@digimakers/core 0.3.21 → 0.3.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -16,6 +16,12 @@ from docling.document_converter import DocumentConverter
|
|
|
16
16
|
logging.getLogger('docling.backend.msword_backend').setLevel(logging.ERROR)
|
|
17
17
|
|
|
18
18
|
def main():
|
|
19
|
+
# Force UTF-8 stdout/stderr to avoid CP1252 mojibake on Windows.
|
|
20
|
+
if hasattr(sys.stdout, "reconfigure"):
|
|
21
|
+
sys.stdout.reconfigure(encoding="utf-8")
|
|
22
|
+
if hasattr(sys.stderr, "reconfigure"):
|
|
23
|
+
sys.stderr.reconfigure(encoding="utf-8")
|
|
24
|
+
|
|
19
25
|
if len(sys.argv) < 2:
|
|
20
26
|
print("Usage: python cleaner.py <path-to-docx>", file=sys.stderr)
|
|
21
27
|
sys.exit(1)
|
|
@@ -31,4 +37,4 @@ def main():
|
|
|
31
37
|
print(markdown)
|
|
32
38
|
|
|
33
39
|
if __name__ == "__main__":
|
|
34
|
-
main()
|
|
40
|
+
main()
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docling-runners.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"docling-runners.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AAkHA,wBAAsB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAoBjF"}
|
|
@@ -57,6 +57,11 @@ function getDoclingMarkdownFromUv(filePath) {
|
|
|
57
57
|
return execFileSync('uv', ['run', 'python', 'cleaner.py', filePath], {
|
|
58
58
|
cwd: cleanerDir,
|
|
59
59
|
encoding: 'utf-8',
|
|
60
|
+
env: {
|
|
61
|
+
...process.env,
|
|
62
|
+
PYTHONUTF8: '1',
|
|
63
|
+
PYTHONIOENCODING: 'utf-8',
|
|
64
|
+
},
|
|
60
65
|
timeout: 120000,
|
|
61
66
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
62
67
|
});
|
|
@@ -72,6 +77,11 @@ export async function getDoclingMarkdown(filePath) {
|
|
|
72
77
|
try {
|
|
73
78
|
return execFileSync(binaryPath, [filePath], {
|
|
74
79
|
encoding: 'utf-8',
|
|
80
|
+
env: {
|
|
81
|
+
...process.env,
|
|
82
|
+
PYTHONUTF8: '1',
|
|
83
|
+
PYTHONIOENCODING: 'utf-8',
|
|
84
|
+
},
|
|
75
85
|
timeout: 120000,
|
|
76
86
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
77
87
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docling-runners.js","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAC3C,KAAK,UAAU,oBAAoB;IACjC,MAAM,WAAW,GAAG,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;IAC1D,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,iBAAiB,CAAC;IAE5F,IAAI,CAAC;QACH,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;QAC7E,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;QAChD,IAAI,UAAU,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC;gBACH,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC,MAAM,EAAE;oBAAE,OAAO,UAAU,CAAC;YACvD,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,oDAAoD,CAAC,CAAC;IACpF,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAC7B,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,MAAM,gBAAgB,GAAG,IAAI,CAAC,OAAO,CACnC,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,gBAAgB,CAAC;QAAE,OAAO,gBAAgB,CAAC;IAE1D,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAC5B,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,SAAS,CAAC;IAE5C,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAClC,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,eAAe,CAAC;QAAE,OAAO,eAAe,CAAC;IAExD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB;IAC/B,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,iBAAiB,CAAC,CAAC;IACxE,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,cAAc,CAAC;IAE/E,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,iBAAiB,CAAC,CAAC;IACpF,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,aAAa,CAAC;IAE7E,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB,CAAC,QAAgB;IAChD,MAAM,UAAU,GAAG,wBAAwB,EAAE,CAAC;IAC9C,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,CAAC,IAAI,CACT,qFAAqF,CACtF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,CAAC;QACH,OAAO,YAAY,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,EAAE;YACnE,GAAG,EAAE,UAAU;YACf,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,MAAM;YACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CACT,EAAE,GAAG,EAAE,KAAK,EAAE,EACd,sFAAsF,CACvF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAChD,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC,QAAQ,CAAC,EAAE;gBAC1C,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,MAAM;gBACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,+CAA+C,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,OAAO,wBAAwB,CAAC,QAAQ,CAAC,CAAC;AAC5C,CAAC"}
|
|
1
|
+
{"version":3,"file":"docling-runners.js","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAC3C,KAAK,UAAU,oBAAoB;IACjC,MAAM,WAAW,GAAG,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;IAC1D,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,iBAAiB,CAAC;IAE5F,IAAI,CAAC;QACH,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;QAC7E,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;QAChD,IAAI,UAAU,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC;gBACH,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC,MAAM,EAAE;oBAAE,OAAO,UAAU,CAAC;YACvD,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,oDAAoD,CAAC,CAAC;IACpF,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAC7B,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,MAAM,gBAAgB,GAAG,IAAI,CAAC,OAAO,CACnC,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,gBAAgB,CAAC;QAAE,OAAO,gBAAgB,CAAC;IAE1D,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAC5B,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,SAAS,CAAC;IAE5C,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAClC,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,eAAe,CAAC;QAAE,OAAO,eAAe,CAAC;IAExD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB;IAC/B,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,iBAAiB,CAAC,CAAC;IACxE,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,cAAc,CAAC;IAE/E,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,iBAAiB,CAAC,CAAC;IACpF,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,aAAa,CAAC;IAE7E,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB,CAAC,QAAgB;IAChD,MAAM,UAAU,GAAG,wBAAwB,EAAE,CAAC;IAC9C,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,CAAC,IAAI,CACT,qFAAqF,CACtF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,CAAC;QACH,OAAO,YAAY,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,EAAE;YACnE,GAAG,EAAE,UAAU;YACf,QAAQ,EAAE,OAAO;YACjB,GAAG,EAAE;gBACH,GAAG,OAAO,CAAC,GAAG;gBACd,UAAU,EAAE,GAAG;gBACf,gBAAgB,EAAE,OAAO;aAC1B;YACD,OAAO,EAAE,MAAM;YACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CACT,EAAE,GAAG,EAAE,KAAK,EAAE,EACd,sFAAsF,CACvF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAChD,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC,QAAQ,CAAC,EAAE;gBAC1C,QAAQ,EAAE,OAAO;gBACjB,GAAG,EAAE;oBACH,GAAG,OAAO,CAAC,GAAG;oBACd,UAAU,EAAE,GAAG;oBACf,gBAAgB,EAAE,OAAO;iBAC1B;gBACD,OAAO,EAAE,MAAM;gBACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,+CAA+C,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,OAAO,wBAAwB,CAAC,QAAQ,CAAC,CAAC;AAC5C,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@digimakers/core",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.22",
|
|
4
4
|
"description": "Core library for Digimaker - docx to PDF conversion",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
},
|
|
23
23
|
"dependencies": {
|
|
24
24
|
"@ai-sdk/google": "^3.0.10",
|
|
25
|
-
"@digimakers/docling-cleaner": "^1.2.
|
|
25
|
+
"@digimakers/docling-cleaner": "^1.2.22",
|
|
26
26
|
"ai": "6.0.39",
|
|
27
27
|
"dotenv": "^17.2.3",
|
|
28
28
|
"express": "5.2.1",
|