@digimakers/core 0.3.20 → 0.3.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/code-formatter.d.ts.map +1 -1
- package/dist/agents/code-formatter.js +6 -4
- package/dist/agents/code-formatter.js.map +1 -1
- package/dist/docling-cleaner/cleaner.py +7 -1
- package/dist/parsing/docling-runners.d.ts.map +1 -1
- package/dist/parsing/docling-runners.js +10 -0
- package/dist/parsing/docling-runners.js.map +1 -1
- package/dist/parsing/docx-parser.d.ts.map +1 -1
- package/dist/parsing/docx-parser.js +6 -5
- package/dist/parsing/docx-parser.js.map +1 -1
- package/package.json +2 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"code-formatter.d.ts","sourceRoot":"","sources":["../../src/agents/code-formatter.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"code-formatter.d.ts","sourceRoot":"","sources":["../../src/agents/code-formatter.ts"],"names":[],"mappings":"AAWA,wBAAsB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,GAAG,IAAI,mBAcvF"}
|
|
@@ -2,14 +2,16 @@ import { generateText } from 'ai';
|
|
|
2
2
|
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
|
3
3
|
import { codeFormatterPrompt, codeFormatterSystemPrompt } from '../parsing/prompts.js';
|
|
4
4
|
import { logger } from '../logger.js';
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
function getGoogleClient() {
|
|
6
|
+
return createGoogleGenerativeAI({
|
|
7
|
+
apiKey: process.env.GEMINI_API_KEY,
|
|
8
|
+
});
|
|
9
|
+
}
|
|
8
10
|
export async function formatDocumentCode(document, footerLanguage) {
|
|
9
11
|
logger.debug('Formatting document code blocks with code formatter LLM');
|
|
10
12
|
if (!footerLanguage || footerLanguage != 'scratch') {
|
|
11
13
|
const { text } = await generateText({
|
|
12
|
-
model:
|
|
14
|
+
model: getGoogleClient()('gemini-2.0-flash'),
|
|
13
15
|
system: codeFormatterSystemPrompt,
|
|
14
16
|
prompt: codeFormatterPrompt(document),
|
|
15
17
|
temperature: 0,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"code-formatter.js","sourceRoot":"","sources":["../../src/agents/code-formatter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,uBAAuB,CAAC;AACvF,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,
|
|
1
|
+
{"version":3,"file":"code-formatter.js","sourceRoot":"","sources":["../../src/agents/code-formatter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,uBAAuB,CAAC;AACvF,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,SAAS,eAAe;IACtB,OAAO,wBAAwB,CAAC;QAC9B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;KACnC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB,EAAE,cAA6B;IACtF,MAAM,CAAC,KAAK,CAAC,yDAAyD,CAAC,CAAC;IACxE,IAAI,CAAC,cAAc,IAAI,cAAc,IAAI,SAAS,EAAE,CAAC;QACnD,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,YAAY,CAAC;YAClC,KAAK,EAAE,eAAe,EAAE,CAAC,kBAAkB,CAAC;YAC5C,MAAM,EAAE,yBAAyB;YACjC,MAAM,EAAE,mBAAmB,CAAC,QAAQ,CAAC;YACrC,WAAW,EAAE,CAAC;SACf,CAAC,CAAC;QACH,MAAM,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,CAAC,KAAK,CAAC,mEAAmE,CAAC,CAAC;IAClF,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -16,6 +16,12 @@ from docling.document_converter import DocumentConverter
|
|
|
16
16
|
logging.getLogger('docling.backend.msword_backend').setLevel(logging.ERROR)
|
|
17
17
|
|
|
18
18
|
def main():
|
|
19
|
+
# Force UTF-8 stdout/stderr to avoid CP1252 mojibake on Windows.
|
|
20
|
+
if hasattr(sys.stdout, "reconfigure"):
|
|
21
|
+
sys.stdout.reconfigure(encoding="utf-8")
|
|
22
|
+
if hasattr(sys.stderr, "reconfigure"):
|
|
23
|
+
sys.stderr.reconfigure(encoding="utf-8")
|
|
24
|
+
|
|
19
25
|
if len(sys.argv) < 2:
|
|
20
26
|
print("Usage: python cleaner.py <path-to-docx>", file=sys.stderr)
|
|
21
27
|
sys.exit(1)
|
|
@@ -31,4 +37,4 @@ def main():
|
|
|
31
37
|
print(markdown)
|
|
32
38
|
|
|
33
39
|
if __name__ == "__main__":
|
|
34
|
-
main()
|
|
40
|
+
main()
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docling-runners.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"docling-runners.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AAkHA,wBAAsB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAoBjF"}
|
|
@@ -57,6 +57,11 @@ function getDoclingMarkdownFromUv(filePath) {
|
|
|
57
57
|
return execFileSync('uv', ['run', 'python', 'cleaner.py', filePath], {
|
|
58
58
|
cwd: cleanerDir,
|
|
59
59
|
encoding: 'utf-8',
|
|
60
|
+
env: {
|
|
61
|
+
...process.env,
|
|
62
|
+
PYTHONUTF8: '1',
|
|
63
|
+
PYTHONIOENCODING: 'utf-8',
|
|
64
|
+
},
|
|
60
65
|
timeout: 120000,
|
|
61
66
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
62
67
|
});
|
|
@@ -72,6 +77,11 @@ export async function getDoclingMarkdown(filePath) {
|
|
|
72
77
|
try {
|
|
73
78
|
return execFileSync(binaryPath, [filePath], {
|
|
74
79
|
encoding: 'utf-8',
|
|
80
|
+
env: {
|
|
81
|
+
...process.env,
|
|
82
|
+
PYTHONUTF8: '1',
|
|
83
|
+
PYTHONIOENCODING: 'utf-8',
|
|
84
|
+
},
|
|
75
85
|
timeout: 120000,
|
|
76
86
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
77
87
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docling-runners.js","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAC3C,KAAK,UAAU,oBAAoB;IACjC,MAAM,WAAW,GAAG,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;IAC1D,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,iBAAiB,CAAC;IAE5F,IAAI,CAAC;QACH,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;QAC7E,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;QAChD,IAAI,UAAU,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC;gBACH,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC,MAAM,EAAE;oBAAE,OAAO,UAAU,CAAC;YACvD,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,oDAAoD,CAAC,CAAC;IACpF,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAC7B,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,MAAM,gBAAgB,GAAG,IAAI,CAAC,OAAO,CACnC,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,gBAAgB,CAAC;QAAE,OAAO,gBAAgB,CAAC;IAE1D,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAC5B,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,SAAS,CAAC;IAE5C,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAClC,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,eAAe,CAAC;QAAE,OAAO,eAAe,CAAC;IAExD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB;IAC/B,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,iBAAiB,CAAC,CAAC;IACxE,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,cAAc,CAAC;IAE/E,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,iBAAiB,CAAC,CAAC;IACpF,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,aAAa,CAAC;IAE7E,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB,CAAC,QAAgB;IAChD,MAAM,UAAU,GAAG,wBAAwB,EAAE,CAAC;IAC9C,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,CAAC,IAAI,CACT,qFAAqF,CACtF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,CAAC;QACH,OAAO,YAAY,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,EAAE;YACnE,GAAG,EAAE,UAAU;YACf,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,MAAM;YACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CACT,EAAE,GAAG,EAAE,KAAK,EAAE,EACd,sFAAsF,CACvF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAChD,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC,QAAQ,CAAC,EAAE;gBAC1C,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,MAAM;gBACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,+CAA+C,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,OAAO,wBAAwB,CAAC,QAAQ,CAAC,CAAC;AAC5C,CAAC"}
|
|
1
|
+
{"version":3,"file":"docling-runners.js","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAC3C,KAAK,UAAU,oBAAoB;IACjC,MAAM,WAAW,GAAG,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;IAC1D,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,iBAAiB,CAAC;IAE5F,IAAI,CAAC;QACH,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;QAC7E,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;QAChD,IAAI,UAAU,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC;gBACH,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC,MAAM,EAAE;oBAAE,OAAO,UAAU,CAAC;YACvD,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,oDAAoD,CAAC,CAAC;IACpF,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAC7B,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,MAAM,gBAAgB,GAAG,IAAI,CAAC,OAAO,CACnC,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,gBAAgB,CAAC;QAAE,OAAO,gBAAgB,CAAC;IAE1D,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAC5B,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,SAAS,CAAC;IAE5C,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAClC,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,eAAe,CAAC;QAAE,OAAO,eAAe,CAAC;IAExD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB;IAC/B,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,iBAAiB,CAAC,CAAC;IACxE,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,cAAc,CAAC;IAE/E,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,iBAAiB,CAAC,CAAC;IACpF,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,aAAa,CAAC;IAE7E,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB,CAAC,QAAgB;IAChD,MAAM,UAAU,GAAG,wBAAwB,EAAE,CAAC;IAC9C,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,CAAC,IAAI,CACT,qFAAqF,CACtF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,CAAC;QACH,OAAO,YAAY,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,EAAE;YACnE,GAAG,EAAE,UAAU;YACf,QAAQ,EAAE,OAAO;YACjB,GAAG,EAAE;gBACH,GAAG,OAAO,CAAC,GAAG;gBACd,UAAU,EAAE,GAAG;gBACf,gBAAgB,EAAE,OAAO;aAC1B;YACD,OAAO,EAAE,MAAM;YACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CACT,EAAE,GAAG,EAAE,KAAK,EAAE,EACd,sFAAsF,CACvF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAChD,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC,QAAQ,CAAC,EAAE;gBAC1C,QAAQ,EAAE,OAAO;gBACjB,GAAG,EAAE;oBACH,GAAG,OAAO,CAAC,GAAG;oBACd,UAAU,EAAE,GAAG;oBACf,gBAAgB,EAAE,OAAO;iBAC1B;gBACD,OAAO,EAAE,MAAM;gBACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,+CAA+C,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,OAAO,wBAAwB,CAAC,QAAQ,CAAC,CAAC;AAC5C,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docx-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,MAAM,EAAiB,MAAM,qBAAqB,CAAC;AAoB5D,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;CACpB;
|
|
1
|
+
{"version":3,"file":"docx-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,MAAM,EAAiB,MAAM,qBAAqB,CAAC;AAoB5D,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;CACpB;AA6BD,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CA6JtE"}
|
|
@@ -10,10 +10,11 @@ import { getDoclingMarkdown } from './docling-runners.js';
|
|
|
10
10
|
import { buildDocxParserPrompt, docxParserSystemPrompt } from './prompts.js';
|
|
11
11
|
import { formatDocumentCode } from '../agents/code-formatter.js';
|
|
12
12
|
import { enrichDebugIssues, inferLessonType, normaliseLessonContent, normaliseLessonForType, } from './post-processors.js';
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
});
|
|
13
|
+
function getGoogleClient() {
|
|
14
|
+
return createGoogleGenerativeAI({
|
|
15
|
+
apiKey: process.env.GEMINI_API_KEY,
|
|
16
|
+
});
|
|
17
|
+
}
|
|
17
18
|
// Extract images from docx as base64 data URIs
|
|
18
19
|
async function extractImages(buffer) {
|
|
19
20
|
const images = [];
|
|
@@ -76,7 +77,7 @@ export async function parseDocx(filePath) {
|
|
|
76
77
|
try {
|
|
77
78
|
// Use LLM to extract structured data
|
|
78
79
|
const response = await generateText({
|
|
79
|
-
model:
|
|
80
|
+
model: getGoogleClient()('gemini-2.5-pro'),
|
|
80
81
|
output: Output.object({
|
|
81
82
|
schema: llmSchema,
|
|
82
83
|
}),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docx-parser.js","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAE1D,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AACtC,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAC/D,OAAO,EAEL,eAAe,EACf,8BAA8B,GAE/B,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAChF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC1D,OAAO,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAC7E,OAAO,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,EACL,iBAAiB,EACjB,eAAe,EACf,sBAAsB,EACtB,sBAAsB,GACvB,MAAM,sBAAsB,CAAC;AAO9B,
|
|
1
|
+
{"version":3,"file":"docx-parser.js","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAE1D,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AACtC,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAC/D,OAAO,EAEL,eAAe,EACf,8BAA8B,GAE/B,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAChF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC1D,OAAO,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAC7E,OAAO,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,EACL,iBAAiB,EACjB,eAAe,EACf,sBAAsB,EACtB,sBAAsB,GACvB,MAAM,sBAAsB,CAAC;AAO9B,SAAS,eAAe;IACtB,OAAO,wBAAwB,CAAC;QAC9B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;KACnC,CAAC,CAAC;AACL,CAAC;AAED,+CAA+C;AAC/C,KAAK,UAAU,aAAa,CAAC,MAAc;IACzC,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,MAAM,OAAO,CAAC,aAAa,CACzB,EAAE,MAAM,EAAE,EACV;QACE,YAAY,EAAE,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;YACtD,MAAM,WAAW,GAAG,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;YACvC,MAAM,MAAM,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,QAAQ,KAAK,CAAC,WAAW,WAAW,MAAM,EAAE,CAAC;YAC7D,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC;QAC1B,CAAC,CAAC;KACH,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6CAA6C;AAC7C,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,CAAC,IAAI,CAAC,YAAY,QAAQ,EAAE,CAAC,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE3C,uEAAuE;IACvE,MAAM,CAAC,SAAS,EAAE,cAAc,EAAE,eAAe,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACrE,aAAa,CAAC,MAAM,CAAC;QACrB,yBAAyB,CAAC,QAAQ,CAAC;QACnC,kBAAkB,CAAC,QAAQ,CAAC;KAC7B,CAAC,CAAC;IAEH,iEAAiE;IACjE,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,IAAI,UAAkB,CAAC;IAEvB,IAAI,eAAe,EAAE,CAAC;QACpB,cAAc,GAAG,oBAAoB,CAAC,eAAe,CAAC,CAAC;QACvD,mBAAmB,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;QAC/C,MAAM,CAAC,KAAK,CACV;YACE,iBAAiB,EAAE,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM;YAC3D,qBAAqB,EAAE,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM;YACnE,WAAW,EAAE,SAAS,CAAC,MAAM;SAC9B,EACD,4BAA4B,CAC7B,CAAC;QACF,UAAU,GAAG,eAAe,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,6DAA6D,CAAC,CAAC;QAC3E,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;QAC1D,UAAU,GAAG,MAAM,kBAAkB,CAAC,eAAe,EAAE,cAAc,CAAC,CAAC;QACvE,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;SAAM,CAAC;QACN,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QACjE,UAAU,GAAG,IAAI,CAAC;QAClB,MAAM,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,aAAa,UAAU,CAAC,MAAM,mBAAmB,SAAS,CAAC,MAAM,SAAS,CAAC,CAAC;IACxF,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,qCAAqC,cAAc,EAAE,CAAC,CAAC;IACrE,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAC3C,CAAC;IAED,2EAA2E;IAC3E,cAAc;IACd,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,8BAA8B,CAAC,CAAC,CAAC,eAAe,CAAC;IAEpF,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,qCAAqC;QACrC,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC;YAClC,KAAK,EAAE,eAAe,EAAE,CAAC,gBAAgB,CAAC;YAC1C,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,SAAS;aAClB,CAAC;YACF,MAAM,EAAE,sBAAsB;YAC9B,MAAM,EAAE,qBAAqB,CAAC,UAAU,CAAC;YACzC,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,CAAC;SACd,CAAC,CAAC;QACH,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,GAAG,GAAG,KAAY,CAAC;QACzB,MAAM,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,EAAE,uBAAuB,CAAC,CAAC;QAEzD,MAAM,MAAM,GAAG,GAAG,EAAE,KAAK,EAAE,MAAM,IAAI,GAAG,EAAE,MAAM,CAAC;QACjD,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,8BAA8B,CAAC,CAAC;QACrE,CAAC;QAED,MAAM,KAAK,GAAG,GAAG,EAAE,KAAK,EAAE,KAAK,IAAI,GAAG,EAAE,KAAK,CAAC;QAC9C,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,mCAAmC,CAAC,CAAC;QACzE,CAAC;QAED,MAAM,KAAK,CAAC;IACd,CAAC;IAED,uCAAuC;IACvC,MAAM,eAAe,GAAG,sBAAsB,CAAC,MAAmB,CAAC,CAAC;IACpE,IAAI,IAAI,GAAG,sBAAsB,CAAC;QAChC,GAAG,eAAe;QAClB,UAAU,EAAE,eAAe,CAAC,UAAU,EAAE,cAAqC,EAAE,eAAe,CAAC;KACtF,CAAC,CAAC;IACb,IAAI,GAAG,iBAAiB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAC3C,MAAM,CAAC,IAAI,CAAC,6BAA6B,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;IAC7D,MAAM,CAAC,IAAI,CAAC,kCAAkC,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;IAE9E,gDAAgD;IAChD,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,CAAC,mBAAmB,GAAG,cAAqC,CAAC;IACnE,CAAC;IAED,6DAA6D;IAC7D,IAAI,cAAc,IAAI,IAAI,CAAC,UAAU,KAAK,kBAAkB,EAAE,CAAC;QAC7D,6BAA6B;QAC7B,IAAI,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,IAAI,CAAC,iBAAiB,GAAG,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC;QAC7D,CAAC;QAED,mCAAmC;QACnC,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC;QAC3C,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClF,MAAM,oBAAoB,GAAG,UAAU,CAAC,KAAK,CAC3C,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM,IAAI,IAAI,CACtE,CAAC;YAEF,IAAI,oBAAoB,EAAE,CAAC;gBACzB,MAAM,KAAK,GAAG,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC;gBACnD,UAA8B,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;oBACtD,IAAI,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;wBACzB,IAAI,CAAC,SAAS,GAAG;4BACf,EAAE,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE;4BACnB,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;yBAC5B,CAAC;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;SAAM,IAAI,IAAI,CAAC,UAAU,KAAK,kBAAkB,EAAE,CAAC;QAClD,sEAAsE;QACtE,2DAA2D;QAC3D,MAAM,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;QAC9D,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,IAAI,CAAC,iBAAiB,GAAG,CAAC,EAAE,EAAE,EAAE,wBAAwB,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACpF,CAAC;QACD,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAEtC,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC;QAC3C,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YACvD,MAAM,oBAAoB,GAAG,UAAU,CAAC,KAAK,CAC3C,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM,IAAI,IAAI,CACtE,CAAC;YAEF,IAAI,oBAAoB,EAAE,CAAC;gBACxB,UAA8B,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;oBACtD,IAAI,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;wBACtB,IAAI,CAAC,SAAS,GAAG;4BACf,EAAE,EAAE,gBAAgB,KAAK,GAAG,CAAC,EAAE;4BAC/B,MAAM,EAAE,UAAU,CAAC,KAAK,CAAC;yBAC1B,CAAC;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAElB,OAAO;QACL,IAAI;QACJ,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@digimakers/core",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.22",
|
|
4
4
|
"description": "Core library for Digimaker - docx to PDF conversion",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
},
|
|
23
23
|
"dependencies": {
|
|
24
24
|
"@ai-sdk/google": "^3.0.10",
|
|
25
|
-
"@digimakers/docling-cleaner": "^1.2.
|
|
25
|
+
"@digimakers/docling-cleaner": "^1.2.22",
|
|
26
26
|
"ai": "6.0.39",
|
|
27
27
|
"dotenv": "^17.2.3",
|
|
28
28
|
"express": "5.2.1",
|