@digimakers/core 0.3.22 → 0.3.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/code-formatter.d.ts.map +1 -1
- package/dist/agents/code-formatter.js +4 -8
- package/dist/agents/code-formatter.js.map +1 -1
- package/dist/google.d.ts +9 -0
- package/dist/google.d.ts.map +1 -0
- package/dist/google.js +13 -0
- package/dist/google.js.map +1 -0
- package/dist/parsing/docling-runners.d.ts.map +1 -1
- package/dist/parsing/docling-runners.js +38 -16
- package/dist/parsing/docling-runners.js.map +1 -1
- package/dist/parsing/docx-parser.d.ts.map +1 -1
- package/dist/parsing/docx-parser.js +22 -16
- package/dist/parsing/docx-parser.js.map +1 -1
- package/package.json +2 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"code-formatter.d.ts","sourceRoot":"","sources":["../../src/agents/code-formatter.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"code-formatter.d.ts","sourceRoot":"","sources":["../../src/agents/code-formatter.ts"],"names":[],"mappings":"AAMA,wBAAsB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,GAAG,IAAI,mBAevF"}
|
|
@@ -1,17 +1,13 @@
|
|
|
1
1
|
import { generateText } from 'ai';
|
|
2
|
-
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
|
3
2
|
import { codeFormatterPrompt, codeFormatterSystemPrompt } from '../parsing/prompts.js';
|
|
4
3
|
import { logger } from '../logger.js';
|
|
5
|
-
|
|
6
|
-
return createGoogleGenerativeAI({
|
|
7
|
-
apiKey: process.env.GEMINI_API_KEY,
|
|
8
|
-
});
|
|
9
|
-
}
|
|
4
|
+
import { getGoogleClient, getGoogleModelIds } from '../google.js';
|
|
10
5
|
export async function formatDocumentCode(document, footerLanguage) {
|
|
11
|
-
logger.debug('Formatting document code blocks with code formatter LLM');
|
|
12
6
|
if (!footerLanguage || footerLanguage != 'scratch') {
|
|
7
|
+
const model = getGoogleModelIds().codeFormatterLlm;
|
|
8
|
+
logger.debug(`Formatting the code blocks of document text using ${model} model`);
|
|
13
9
|
const { text } = await generateText({
|
|
14
|
-
model: getGoogleClient()(
|
|
10
|
+
model: getGoogleClient()(model),
|
|
15
11
|
system: codeFormatterSystemPrompt,
|
|
16
12
|
prompt: codeFormatterPrompt(document),
|
|
17
13
|
temperature: 0,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"code-formatter.js","sourceRoot":"","sources":["../../src/agents/code-formatter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"code-formatter.js","sourceRoot":"","sources":["../../src/agents/code-formatter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,uBAAuB,CAAC;AACvF,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAElE,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB,EAAE,cAA6B;IACtF,IAAI,CAAC,cAAc,IAAI,cAAc,IAAI,SAAS,EAAE,CAAC;QACnD,MAAM,KAAK,GAA8B,iBAAiB,EAAE,CAAC,gBAAgB,CAAC;QAC9E,MAAM,CAAC,KAAK,CAAC,qDAAqD,KAAK,QAAQ,CAAC,CAAC;QACjF,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,YAAY,CAAC;YAClC,KAAK,EAAE,eAAe,EAAE,CAAC,KAAK,CAAC;YAC/B,MAAM,EAAE,yBAAyB;YACjC,MAAM,EAAE,mBAAmB,CAAC,QAAQ,CAAC;YACrC,WAAW,EAAE,CAAC;SACf,CAAC,CAAC;QACH,MAAM,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,CAAC,KAAK,CAAC,mEAAmE,CAAC,CAAC;IAClF,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
package/dist/google.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { GoogleGenerativeAIModelId } from '@ai-sdk/google/internal';
|
|
2
|
+
export declare function getGoogleClient(): import("@ai-sdk/google").GoogleGenerativeAIProvider;
|
|
3
|
+
type ModelIdResponse = {
|
|
4
|
+
mainLlm: GoogleGenerativeAIModelId;
|
|
5
|
+
codeFormatterLlm: GoogleGenerativeAIModelId;
|
|
6
|
+
};
|
|
7
|
+
export declare function getGoogleModelIds(): ModelIdResponse;
|
|
8
|
+
export {};
|
|
9
|
+
//# sourceMappingURL=google.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"google.d.ts","sourceRoot":"","sources":["../src/google.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,yBAAyB,CAAC;AAEpE,wBAAgB,eAAe,wDAI9B;AAED,KAAK,eAAe,GAAG;IACrB,OAAO,EAAE,yBAAyB,CAAC;IACnC,gBAAgB,EAAE,yBAAyB,CAAC;CAC7C,CAAC;AACF,wBAAgB,iBAAiB,IAAI,eAAe,CAKnD"}
|
package/dist/google.js
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
|
2
|
+
export function getGoogleClient() {
|
|
3
|
+
return createGoogleGenerativeAI({
|
|
4
|
+
apiKey: process.env.GEMINI_API_KEY,
|
|
5
|
+
});
|
|
6
|
+
}
|
|
7
|
+
export function getGoogleModelIds() {
|
|
8
|
+
return {
|
|
9
|
+
mainLlm: process.env.MAIN_GEMINI_MODEL || 'gemini-2.5-pro',
|
|
10
|
+
codeFormatterLlm: process.env.CODE_FORMATTER_GEMINI_MODEL || 'gemini-2.5-flash-lite',
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=google.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"google.js","sourceRoot":"","sources":["../src/google.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAG1D,MAAM,UAAU,eAAe;IAC7B,OAAO,wBAAwB,CAAC;QAC9B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;KACnC,CAAC,CAAC;AACL,CAAC;AAMD,MAAM,UAAU,iBAAiB;IAC/B,OAAO;QACL,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,gBAAgB;QAC1D,gBAAgB,EAAE,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,uBAAuB;KACrF,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docling-runners.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"docling-runners.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AA0FA,wBAAsB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAkCjF"}
|
|
@@ -1,13 +1,36 @@
|
|
|
1
1
|
import path from 'path';
|
|
2
2
|
import { execFileSync } from 'child_process';
|
|
3
|
-
import { existsSync, statSync } from 'fs';
|
|
3
|
+
import { existsSync, readFileSync, statSync } from 'fs';
|
|
4
4
|
import { fileURLToPath } from 'url';
|
|
5
5
|
import { logger } from '../logger.js';
|
|
6
6
|
const __filename = fileURLToPath(import.meta.url);
|
|
7
7
|
const __dirname = path.dirname(__filename);
|
|
8
|
+
/**
|
|
9
|
+
* Detect if running in development mode (monorepo).
|
|
10
|
+
* Dev mode uses uv directly instead of pre-built binaries.
|
|
11
|
+
*/
|
|
12
|
+
function isDevMode() {
|
|
13
|
+
// Explicit env var takes priority
|
|
14
|
+
if (process.env.DIGIMAKER_DEV === '1')
|
|
15
|
+
return true;
|
|
16
|
+
if (process.env.DIGIMAKER_DEV === '0')
|
|
17
|
+
return false;
|
|
18
|
+
// Check if we're in the monorepo by looking for root package.json with workspaces
|
|
19
|
+
try {
|
|
20
|
+
const rootPkgPath = path.resolve(__dirname, '..', '..', '..', '..', 'package.json');
|
|
21
|
+
if (existsSync(rootPkgPath)) {
|
|
22
|
+
const pkg = JSON.parse(readFileSync(rootPkgPath, 'utf-8'));
|
|
23
|
+
if (pkg.workspaces && pkg.name === 'digimaker-monorepo') {
|
|
24
|
+
return true;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
// Ignore errors, assume not dev mode
|
|
30
|
+
}
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
8
33
|
async function resolveDoclingBinary() {
|
|
9
|
-
const platformTag = `${process.platform}-${process.arch}`;
|
|
10
|
-
const binaryName = process.platform === 'win32' ? 'docling-cleaner.exe' : 'docling-cleaner';
|
|
11
34
|
try {
|
|
12
35
|
const { ensureDoclingCleaner } = await import('@digimakers/docling-cleaner');
|
|
13
36
|
const binaryPath = await ensureDoclingCleaner();
|
|
@@ -22,20 +45,8 @@ async function resolveDoclingBinary() {
|
|
|
22
45
|
}
|
|
23
46
|
}
|
|
24
47
|
catch (error) {
|
|
25
|
-
logger.warn({ err: error }, 'Docling
|
|
48
|
+
logger.warn({ err: error }, 'Docling binary download failed');
|
|
26
49
|
}
|
|
27
|
-
const distBinary = path.resolve(__dirname, '..', 'docling-cleaner', 'bin', platformTag, binaryName);
|
|
28
|
-
if (existsSync(distBinary))
|
|
29
|
-
return distBinary;
|
|
30
|
-
const distOnedirBinary = path.resolve(__dirname, '..', 'docling-cleaner', 'bin', platformTag, 'docling-cleaner', binaryName);
|
|
31
|
-
if (existsSync(distOnedirBinary))
|
|
32
|
-
return distOnedirBinary;
|
|
33
|
-
const srcBinary = path.resolve(__dirname, '..', '..', 'src', 'docling-cleaner', 'bin', platformTag, binaryName);
|
|
34
|
-
if (existsSync(srcBinary))
|
|
35
|
-
return srcBinary;
|
|
36
|
-
const srcOnedirBinary = path.resolve(__dirname, '..', '..', 'src', 'docling-cleaner', 'bin', platformTag, 'docling-cleaner', binaryName);
|
|
37
|
-
if (existsSync(srcOnedirBinary))
|
|
38
|
-
return srcOnedirBinary;
|
|
39
50
|
return null;
|
|
40
51
|
}
|
|
41
52
|
function resolveDoclingCleanerDir() {
|
|
@@ -72,6 +83,17 @@ function getDoclingMarkdownFromUv(filePath) {
|
|
|
72
83
|
}
|
|
73
84
|
}
|
|
74
85
|
export async function getDoclingMarkdown(filePath) {
|
|
86
|
+
// In dev mode, always use uv to run local Python directly
|
|
87
|
+
if (isDevMode()) {
|
|
88
|
+
logger.debug('Running in dev mode, using uv for docling');
|
|
89
|
+
const result = getDoclingMarkdownFromUv(filePath);
|
|
90
|
+
if (result === null) {
|
|
91
|
+
throw new Error('Docling uv runner failed. Developers must have uv installed. ' +
|
|
92
|
+
'Run: curl -LsSf https://astral.sh/uv/install.sh | sh');
|
|
93
|
+
}
|
|
94
|
+
return result;
|
|
95
|
+
}
|
|
96
|
+
// Production mode: try binary first, then fall back to uv
|
|
75
97
|
const binaryPath = await resolveDoclingBinary();
|
|
76
98
|
if (binaryPath) {
|
|
77
99
|
try {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docling-runners.js","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"docling-runners.js","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAE3C;;;GAGG;AACH,SAAS,SAAS;IAChB,kCAAkC;IAClC,IAAI,OAAO,CAAC,GAAG,CAAC,aAAa,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC;IACnD,IAAI,OAAO,CAAC,GAAG,CAAC,aAAa,KAAK,GAAG;QAAE,OAAO,KAAK,CAAC;IAEpD,kFAAkF;IAClF,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,CAAC,CAAC;QACpF,IAAI,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YAC5B,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC;YAC3D,IAAI,GAAG,CAAC,UAAU,IAAI,GAAG,CAAC,IAAI,KAAK,oBAAoB,EAAE,CAAC;gBACxD,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,qCAAqC;IACvC,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AACD,KAAK,UAAU,oBAAoB;IACjC,IAAI,CAAC;QACH,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;QAC7E,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;QAChD,IAAI,UAAU,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC;gBACH,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC,MAAM,EAAE;oBAAE,OAAO,UAAU,CAAC;YACvD,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,gCAAgC,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB;IAC/B,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,iBAAiB,CAAC,CAAC;IACxE,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,cAAc,CAAC;IAE/E,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,iBAAiB,CAAC,CAAC;IACpF,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,aAAa,CAAC;IAE7E,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB,CAAC,QAAgB;IAChD,MAAM,UAAU,GAAG,wBAAwB,EAAE,CAAC;IAC9C,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,CAAC,IAAI,CACT,qFAAqF,CACtF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,CAAC;QACH,OAAO,YAAY,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,EAAE;YACnE,GAAG,EAAE,UAAU;YACf,QAAQ,EAAE,OAAO;YACjB,GAAG,EAAE;gBACH,GAAG,OAAO,CAAC,GAAG;gBACd,UAAU,EAAE,GAAG;gBACf,gBAAgB,EAAE,OAAO;aAC1B;YACD,OAAO,EAAE,MAAM;YACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CACT,EAAE,GAAG,EAAE,KAAK,EAAE,EACd,sFAAsF,CACvF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,0DAA0D;IAC1D,IAAI,SAAS,EAAE,EAAE,CAAC;QAChB,MAAM,CAAC,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC1D,MAAM,MAAM,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;QAClD,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CACb,+DAA+D;gBAC7D,sDAAsD,CACzD,CAAC;QACJ,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,0DAA0D;IAC1D,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAChD,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC,QAAQ,CAAC,EAAE;gBAC1C,QAAQ,EAAE,OAAO;gBACjB,GAAG,EAAE;oBACH,GAAG,OAAO,CAAC,GAAG;oBACd,UAAU,EAAE,GAAG;oBACf,gBAAgB,EAAE,OAAO;iBAC1B;gBACD,OAAO,EAAE,MAAM;gBACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,+CAA+C,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,OAAO,wBAAwB,CAAC,QAAQ,CAAC,CAAC;AAC5C,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docx-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,MAAM,EAAiB,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"docx-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,MAAM,EAAiB,MAAM,qBAAqB,CAAC;AAqB5D,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;CACpB;AAuBD,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CA8JtE"}
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import fs from 'fs/promises';
|
|
2
2
|
import mammoth from 'mammoth';
|
|
3
3
|
import { generateText, Output } from 'ai';
|
|
4
|
-
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
|
5
4
|
import { logger } from '../logger.js';
|
|
6
5
|
import { extractLanguageFromFooter } from './footer-parser.js';
|
|
7
6
|
import { LessonLLMSchema, LessonLLMSchemaWithoutLanguage, } from '../schemas/lesson.js';
|
|
@@ -10,11 +9,7 @@ import { getDoclingMarkdown } from './docling-runners.js';
|
|
|
10
9
|
import { buildDocxParserPrompt, docxParserSystemPrompt } from './prompts.js';
|
|
11
10
|
import { formatDocumentCode } from '../agents/code-formatter.js';
|
|
12
11
|
import { enrichDebugIssues, inferLessonType, normaliseLessonContent, normaliseLessonForType, } from './post-processors.js';
|
|
13
|
-
|
|
14
|
-
return createGoogleGenerativeAI({
|
|
15
|
-
apiKey: process.env.GEMINI_API_KEY,
|
|
16
|
-
});
|
|
17
|
-
}
|
|
12
|
+
import { getGoogleClient, getGoogleModelIds } from '../google.js';
|
|
18
13
|
// Extract images from docx as base64 data URIs
|
|
19
14
|
async function extractImages(buffer) {
|
|
20
15
|
const images = [];
|
|
@@ -33,7 +28,6 @@ async function extractImages(buffer) {
|
|
|
33
28
|
export async function parseDocx(filePath) {
|
|
34
29
|
logger.info(`Parsing: ${filePath}`);
|
|
35
30
|
const buffer = await fs.readFile(filePath);
|
|
36
|
-
// Extract images and footer language in parallel, try docling markdown
|
|
37
31
|
const [allImages, footerLanguage, doclingMarkdown] = await Promise.all([
|
|
38
32
|
extractImages(buffer),
|
|
39
33
|
extractLanguageFromFooter(filePath),
|
|
@@ -53,11 +47,11 @@ export async function parseDocx(filePath) {
|
|
|
53
47
|
textForLLM = doclingMarkdown;
|
|
54
48
|
logger.info('Using docling markdown with placeholder-based image mapping');
|
|
55
49
|
logger.info(textForLLM);
|
|
56
|
-
logger.info('Formatting document code blocks with agent');
|
|
57
50
|
textForLLM = await formatDocumentCode(doclingMarkdown, footerLanguage);
|
|
58
51
|
logger.info(textForLLM);
|
|
59
52
|
}
|
|
60
53
|
else {
|
|
54
|
+
// Mammoth fallback if docling is not available, no parsedSections
|
|
61
55
|
const { value: text } = await mammoth.extractRawText({ buffer });
|
|
62
56
|
textForLLM = text;
|
|
63
57
|
logger.info('Falling back to mammoth text extraction');
|
|
@@ -68,16 +62,18 @@ export async function parseDocx(filePath) {
|
|
|
68
62
|
logger.info(`Programming language from footer: ${footerLanguage}`);
|
|
69
63
|
}
|
|
70
64
|
else {
|
|
71
|
-
logger.warn('
|
|
65
|
+
logger.warn('Programming language not found from footer, LLM must determine');
|
|
72
66
|
}
|
|
73
|
-
// If we find the programming language in the footer,
|
|
74
|
-
// to
|
|
67
|
+
// If we find the programming language in the footer,
|
|
68
|
+
// we don't need the LLM to figure it out.
|
|
75
69
|
const llmSchema = footerLanguage ? LessonLLMSchemaWithoutLanguage : LessonLLMSchema;
|
|
76
70
|
let output;
|
|
77
71
|
try {
|
|
78
72
|
// Use LLM to extract structured data
|
|
73
|
+
const model = getGoogleModelIds().mainLlm;
|
|
74
|
+
logger.info(`Extracting structured data using ${model} model`);
|
|
79
75
|
const response = await generateText({
|
|
80
|
-
model: getGoogleClient()(
|
|
76
|
+
model: getGoogleClient()(model),
|
|
81
77
|
output: Output.object({
|
|
82
78
|
schema: llmSchema,
|
|
83
79
|
}),
|
|
@@ -89,6 +85,7 @@ export async function parseDocx(filePath) {
|
|
|
89
85
|
output = response.output;
|
|
90
86
|
}
|
|
91
87
|
catch (error) {
|
|
88
|
+
// Handle LLM errors
|
|
92
89
|
const err = error;
|
|
93
90
|
logger.error({ err, filePath }, 'LLM extraction failed');
|
|
94
91
|
const issues = err?.cause?.issues ?? err?.issues;
|
|
@@ -110,12 +107,21 @@ export async function parseDocx(filePath) {
|
|
|
110
107
|
data = enrichDebugIssues(textForLLM, data);
|
|
111
108
|
logger.info(`Inferred lesson type as: '${data.lessonType}'`);
|
|
112
109
|
logger.info(`Successfully extracted lesson: ${data.topic} - ${data.project}`);
|
|
113
|
-
// Set programming language from footer if found
|
|
114
110
|
if (footerLanguage) {
|
|
111
|
+
// Set programming language from footer if found
|
|
115
112
|
data.programmingLanguage = footerLanguage;
|
|
116
113
|
}
|
|
117
|
-
//
|
|
118
|
-
if (
|
|
114
|
+
// Debugging lessons have no images, exit early
|
|
115
|
+
if (data.lessonType === 'debugging lesson') {
|
|
116
|
+
logger.info(data);
|
|
117
|
+
return {
|
|
118
|
+
data,
|
|
119
|
+
sourcePath: filePath,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
// Image assignment (two methods)
|
|
123
|
+
if (parsedSections) {
|
|
124
|
+
// Assign images using placeholder-based mapping if available
|
|
119
125
|
// Assign preface image slots
|
|
120
126
|
if (parsedSections.preface.imageSlots.length > 0) {
|
|
121
127
|
data.prefaceImageSlots = parsedSections.preface.imageSlots;
|
|
@@ -137,7 +143,7 @@ export async function parseDocx(filePath) {
|
|
|
137
143
|
}
|
|
138
144
|
}
|
|
139
145
|
}
|
|
140
|
-
else
|
|
146
|
+
else {
|
|
141
147
|
// Fallback using old behavior, first image is project, rest are steps
|
|
142
148
|
// Not good if there are multiple images in preface section
|
|
143
149
|
logger.warn('Falling back to old image assignment behaviour');
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docx-parser.js","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"docx-parser.js","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAG1C,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AACtC,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAC/D,OAAO,EAEL,eAAe,EACf,8BAA8B,GAE/B,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAChF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC1D,OAAO,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAC7E,OAAO,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,EACL,iBAAiB,EACjB,eAAe,EACf,sBAAsB,EACtB,sBAAsB,GACvB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAOlE,+CAA+C;AAC/C,KAAK,UAAU,aAAa,CAAC,MAAc;IACzC,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,MAAM,OAAO,CAAC,aAAa,CACzB,EAAE,MAAM,EAAE,EACV;QACE,YAAY,EAAE,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;YACtD,MAAM,WAAW,GAAG,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;YACvC,MAAM,MAAM,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,QAAQ,KAAK,CAAC,WAAW,WAAW,MAAM,EAAE,CAAC;YAC7D,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC;QAC1B,CAAC,CAAC;KACH,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6CAA6C;AAC7C,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,CAAC,IAAI,CAAC,YAAY,QAAQ,EAAE,CAAC,CAAC;IACpC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC3C,MAAM,CAAC,SAAS,EAAE,cAAc,EAAE,eAAe,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACrE,aAAa,CAAC,MAAM,CAAC;QACrB,yBAAyB,CAAC,QAAQ,CAAC;QACnC,kBAAkB,CAAC,QAAQ,CAAC;KAC7B,CAAC,CAAC;IAEH,iEAAiE;IACjE,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,IAAI,UAAkB,CAAC;IACvB,IAAI,eAAe,EAAE,CAAC;QACpB,cAAc,GAAG,oBAAoB,CAAC,eAAe,CAAC,CAAC;QACvD,mBAAmB,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;QAC/C,MAAM,CAAC,KAAK,CACV;YACE,iBAAiB,EAAE,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM;YAC3D,qBAAqB,EAAE,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM;YACnE,WAAW,EAAE,SAAS,CAAC,MAAM;SAC9B,EACD,4BAA4B,CAC7B,CAAC;QACF,UAAU,GAAG,eAAe,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,6DAA6D,CAAC,CAAC;QAC3E,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxB,UAAU,GAAG,MAAM,kBAAkB,CAAC,eAAe,EAAE,cAAc,CAAC,CAAC;QACvE,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;SAAM,CAAC;QACN,kEAAkE;QAClE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QACjE,UAAU,GAAG,IAAI,CAAC;QAClB,MAAM,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,aAAa,UAAU,CAAC,MAAM,mBAAmB,SAAS,CAAC,MAAM,SAAS,CAAC,CAAC;IACxF,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,qCAAqC,cAAc,EAAE,CAAC,CAAC;IACrE,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,IAAI,CAAC,gEAAgE,CAAC,CAAC;IAChF,CAAC;IAED,qDAAqD;IACrD,0CAA0C;IAC1C,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,8BAA8B,CAAC,CAAC,CAAC,eAAe,CAAC;IAEpF,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,qCAAqC;QACrC,MAAM,KAAK,GAAG,iBAAiB,EAAE,CAAC,OAAO,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,oCAAoC,KAAK,QAAQ,CAAC,CAAC;QAC/D,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC;YAClC,KAAK,EAAE,eAAe,EAAE,CAAC,KAAK,CAAC;YAC/B,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,SAAS;aAClB,CAAC;YACF,MAAM,EAAE,sBAAsB;YAC9B,MAAM,EAAE,qBAAqB,CAAC,UAAU,CAAC;YACzC,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,CAAC;SACd,CAAC,CAAC;QACH,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,oBAAoB;QACpB,MAAM,GAAG,GAAG,KAAY,CAAC;QACzB,MAAM,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,EAAE,uBAAuB,CAAC,CAAC;QACzD,MAAM,MAAM,GAAG,GAAG,EAAE,KAAK,EAAE,MAAM,IAAI,GAAG,EAAE,MAAM,CAAC;QACjD,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,8BAA8B,CAAC,CAAC;QACrE,CAAC;QACD,MAAM,KAAK,GAAG,GAAG,EAAE,KAAK,EAAE,KAAK,IAAI,GAAG,EAAE,KAAK,CAAC;QAC9C,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,mCAAmC,CAAC,CAAC;QACzE,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;IAED,uCAAuC;IACvC,MAAM,eAAe,GAAG,sBAAsB,CAAC,MAAmB,CAAC,CAAC;IACpE,IAAI,IAAI,GAAG,sBAAsB,CAAC;QAChC,GAAG,eAAe;QAClB,UAAU,EAAE,eAAe,CAAC,UAAU,EAAE,cAAqC,EAAE,eAAe,CAAC;KACtF,CAAC,CAAC;IACb,IAAI,GAAG,iBAAiB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAC3C,MAAM,CAAC,IAAI,CAAC,6BAA6B,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;IAC7D,MAAM,CAAC,IAAI,CAAC,kCAAkC,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;IAE9E,IAAI,cAAc,EAAE,CAAC;QACnB,gDAAgD;QAChD,IAAI,CAAC,mBAAmB,GAAG,cAAqC,CAAC;IACnE,CAAC;IAED,+CAA+C;IAC/C,IAAI,IAAI,CAAC,UAAU,KAAK,kBAAkB,EAAE,CAAC;QAC3C,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClB,OAAO;YACL,IAAI;YACJ,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,iCAAiC;IACjC,IAAI,cAAc,EAAE,CAAC;QACnB,6DAA6D;QAC7D,6BAA6B;QAC7B,IAAI,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,IAAI,CAAC,iBAAiB,GAAG,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC;QAC7D,CAAC;QACD,mCAAmC;QACnC,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC;QAC3C,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClF,MAAM,oBAAoB,GAAG,UAAU,CAAC,KAAK,CAC3C,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM,IAAI,IAAI,CACtE,CAAC;YACF,IAAI,oBAAoB,EAAE,CAAC;gBACzB,MAAM,KAAK,GAAG,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC;gBACnD,UAA8B,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;oBACtD,IAAI,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;wBACzB,IAAI,CAAC,SAAS,GAAG;4BACf,EAAE,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE;4BACnB,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;yBAC5B,CAAC;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;SAAM,CAAC;QACN,sEAAsE;QACtE,2DAA2D;QAC3D,MAAM,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;QAC9D,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,IAAI,CAAC,iBAAiB,GAAG,CAAC,EAAE,EAAE,EAAE,wBAAwB,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACpF,CAAC;QACD,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACtC,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC;QAC3C,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YACvD,MAAM,oBAAoB,GAAG,UAAU,CAAC,KAAK,CAC3C,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM,IAAI,IAAI,CACtE,CAAC;YACF,IAAI,oBAAoB,EAAE,CAAC;gBACxB,UAA8B,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;oBACtD,IAAI,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;wBACtB,IAAI,CAAC,SAAS,GAAG;4BACf,EAAE,EAAE,gBAAgB,KAAK,GAAG,CAAC,EAAE;4BAC/B,MAAM,EAAE,UAAU,CAAC,KAAK,CAAC;yBAC1B,CAAC;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClB,OAAO;QACL,IAAI;QACJ,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@digimakers/core",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.24",
|
|
4
4
|
"description": "Core library for Digimaker - docx to PDF conversion",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
},
|
|
23
23
|
"dependencies": {
|
|
24
24
|
"@ai-sdk/google": "^3.0.10",
|
|
25
|
-
"@digimakers/docling-cleaner": "^1.2.
|
|
25
|
+
"@digimakers/docling-cleaner": "^1.2.24",
|
|
26
26
|
"ai": "6.0.39",
|
|
27
27
|
"dotenv": "^17.2.3",
|
|
28
28
|
"express": "5.2.1",
|