@digimakers/docling-cleaner 1.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/index.d.ts +1 -0
  2. package/index.js +179 -0
  3. package/package.json +19 -0
package/index.d.ts ADDED
@@ -0,0 +1 @@
1
+ export declare function ensureDoclingCleaner(): Promise<string>;
package/index.js ADDED
@@ -0,0 +1,179 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const os = require('os');
4
+ const https = require('https');
5
+ const { execFileSync } = require('child_process');
6
+
7
+ const PLATFORM_TAGS = {
8
+ 'linux-x64': 'linux-x64',
9
+ 'darwin-x64': 'darwin-x64',
10
+ 'darwin-arm64': 'darwin-arm64',
11
+ 'win32-x64': 'win32-x64',
12
+ };
13
+
14
+ const ASSET_NAMES = {
15
+ 'linux-x64': 'docling-cleaner-linux-x64.tar.gz',
16
+ 'darwin-x64': 'docling-cleaner-darwin-x64',
17
+ 'darwin-arm64': 'docling-cleaner-darwin-arm64',
18
+ 'win32-x64': 'docling-cleaner-win32-x64.exe',
19
+ };
20
+
21
+ function getPackageVersion() {
22
+ const pkgPath = path.join(__dirname, 'package.json');
23
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
24
+ return pkg.version;
25
+ }
26
+
27
+ function getPlatformTag() {
28
+ const tag = `${process.platform}-${process.arch}`;
29
+ return PLATFORM_TAGS[tag] || null;
30
+ }
31
+
32
+ function getCacheDir(version) {
33
+ if (process.platform === 'win32') {
34
+ const base = process.env.LOCALAPPDATA || path.json(os.homedir(), 'AppData', 'Local');
35
+ return path.join(base, 'digimaker', 'docling-cleaner', version);
36
+ }
37
+
38
+ return path.join(os.homedir(), '.cache', 'digimaker', 'docling-cleaner', version);
39
+ }
40
+
41
+ function ensureDirSync(dir) {
42
+ fs.mkdirSync(dir, { recursive: true });
43
+ }
44
+
45
+ function extractTarball(tarPath, destination) {
46
+ ensureDirSync(destination);
47
+ const stdio =
48
+ process.env.DEBUG_DOCLING_CLEANER === '1' || process.env.CI === 'true' ? 'inherit' : 'ignore';
49
+ execFileSync('tar', ['-xzf', tarPath, '-C', destination], { stdio });
50
+ }
51
+
52
+ function debugLog(message) {
53
+ if (process.env.DEBUG_DOCLING_CLEANER === '1') {
54
+ // eslint-disable-next-line no-console
55
+ console.log(message);
56
+ }
57
+ }
58
+
59
+ function downloadFile(url, destination, redirects = 0) {
60
+ return new Promise((resolve, reject) => {
61
+ const file = fs.createWriteStream(destination);
62
+ const request = https.get(url, (response) => {
63
+ const status = response.statusCode || 0;
64
+ if (status >= 300 && status < 400 && response.headers.location) {
65
+ if (redirects >= 5) {
66
+ response.resume();
67
+ reject(new Error('Too many redirects'));
68
+ return;
69
+ }
70
+ const nextUrl = new URL(response.headers.location, url).toString();
71
+ response.resume();
72
+ file.close(() => {
73
+ fs.unlink(destination, () => {
74
+ downloadFile(nextUrl, destination, redirects + 1).then(resolve, reject);
75
+ });
76
+ });
77
+ return;
78
+ }
79
+
80
+ if (status !== 200) {
81
+ response.resume();
82
+ file.close(() => {
83
+ fs.unlink(destination, () => {
84
+ reject(new Error(`Unexpected status code ${status}`));
85
+ });
86
+ });
87
+ return;
88
+ }
89
+
90
+ response.pipe(file);
91
+ file.on('finish', () => file.close(resolve));
92
+ });
93
+
94
+ request.on('error', (error) => {
95
+ fs.unlink(destination, () => {
96
+ reject(error);
97
+ });
98
+ });
99
+
100
+ file.on('error', (error) => {
101
+ fs.unlink(destination, () => {
102
+ reject(error);
103
+ });
104
+ });
105
+ });
106
+ }
107
+
108
+ async function ensureDoclingCleaner() {
109
+ const overridePath = process.env.DOCLING_CLEANER_PATH;
110
+ if (overridePath && fs.existsSync(overridePath)) {
111
+ return overridePath;
112
+ }
113
+
114
+ const platformTag = getPlatformTag();
115
+ if (!platformTag) {
116
+ throw new Error(
117
+ `Unsupported platform for docling-cleaner: ${process.platform}-${process.arch}`
118
+ );
119
+ }
120
+
121
+ const version = getPackageVersion();
122
+ const assetName = ASSET_NAMES[platformTag];
123
+ const cacheDir = path.join(getCacheDir(version), platformTag);
124
+ const cachedBinaryPath = path.join(cacheDir, assetName);
125
+ const linuxExtractedBinary = path.join(cacheDir, 'docling-cleaner', 'docling-cleaner');
126
+
127
+ if (platformTag === 'linux-x64' && fs.existsSync(linuxExtractedBinary)) {
128
+ fs.chmodSync(linuxExtractedBinary, 0o755);
129
+ return linuxExtractedBinary;
130
+ }
131
+
132
+ if (platformTag !== 'linux-x64' && fs.existsSync(cachedBinaryPath)) {
133
+ if (process.platform !== 'win32') {
134
+ fs.chmodSync(cachedBinaryPath, 0o755);
135
+ }
136
+ return cachedBinaryPath;
137
+ }
138
+
139
+ ensureDirSync(cacheDir);
140
+
141
+ const baseUrl =
142
+ process.env.DOCLING_CLEANER_BASE_URL ||
143
+ `https://github.com/jenul-ferdinand/digimaker/releases/download/v${version}/`;
144
+ const normalizedBaseUrl = baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`;
145
+ const downloadUrl = `${normalizedBaseUrl}${assetName}`;
146
+ const tempPath = `${cachedBinaryPath}.download`;
147
+
148
+ try {
149
+ debugLog(`docling-cleaner download: ${downloadUrl}`);
150
+ debugLog(`docling-cleaner cache dir: ${cacheDir}`);
151
+ await downloadFile(downloadUrl, tempPath);
152
+ if (platformTag === 'linux-x64') {
153
+ extractTarball(tempPath, cacheDir);
154
+ fs.unlinkSync(tempPath);
155
+ if (fs.existsSync(linuxExtractedBinary)) {
156
+ debugLog(`docling-cleaner extracted: ${linuxExtractedBinary}`);
157
+ fs.chmodSync(linuxExtractedBinary, 0o755);
158
+ return linuxExtractedBinary;
159
+ }
160
+ throw new Error('Extracted Linux binary not found after tar extraction');
161
+ }
162
+
163
+ fs.renameSync(tempPath, cachedBinaryPath);
164
+ if (process.platform !== 'win32') {
165
+ fs.chmodSync(cachedBinaryPath, 0o755);
166
+ }
167
+ return cachedBinaryPath;
168
+ } catch (error) {
169
+ try {
170
+ fs.unlinkSync(tempPath);
171
+ } catch {
172
+ // Ignore cleanup errors.
173
+ }
174
+ const message = error && error.message ? error.message : String(error);
175
+ throw new Error(`Failed to download docling-cleaner from ${downloadUrl}: ${message}`);
176
+ }
177
+ }
178
+
179
+ module.exports = { ensureDoclingCleaner };
package/package.json ADDED
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "@digimakers/docling-cleaner",
3
+ "version": "1.2.20",
4
+ "description": "Docling cleaner binary downloader",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/jenul-ferdinand/digimaker.git",
9
+ "directory": "packages/docling-cleaner"
10
+ },
11
+ "main": "index.js",
12
+ "files": [
13
+ "index.js",
14
+ "index.d.ts"
15
+ ],
16
+ "publishConfig": {
17
+ "access": "public"
18
+ }
19
+ }