plugin-document-parser 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/client.d.ts +2 -0
  2. package/client.js +1 -0
  3. package/dist/client/01b8a5798a872638.js +10 -0
  4. package/dist/client/022be20abc96fdb4.js +10 -0
  5. package/dist/client/12e97e7a84d900e0.js +10 -0
  6. package/dist/client/index.js +10 -0
  7. package/dist/externalVersion.js +20 -0
  8. package/dist/index.js +48 -0
  9. package/dist/locale/en-US.json +54 -0
  10. package/dist/locale/vi-VN.json +54 -0
  11. package/dist/node_modules/form-data/License +19 -0
  12. package/dist/node_modules/form-data/index.d.ts +62 -0
  13. package/dist/node_modules/form-data/lib/browser.js +4 -0
  14. package/dist/node_modules/form-data/lib/form_data.js +14 -0
  15. package/dist/node_modules/form-data/lib/populate.js +10 -0
  16. package/dist/node_modules/form-data/package.json +1 -0
  17. package/dist/server/collections/doc-parser-providers.js +137 -0
  18. package/dist/server/collections/doc-parser-settings.js +85 -0
  19. package/dist/server/index.js +51 -0
  20. package/dist/server/plugin.js +181 -0
  21. package/dist/server/resource/docParserProviders.js +91 -0
  22. package/dist/server/services/builtin-ai-handler.js +63 -0
  23. package/dist/server/services/external-ocr-client.js +189 -0
  24. package/dist/server/services/internal-parser-registry.js +82 -0
  25. package/dist/server/services/parse-router.js +273 -0
  26. package/package.json +33 -0
  27. package/server.d.ts +2 -0
  28. package/server.js +1 -0
  29. package/src/client/components/GlobalSettings.tsx +151 -0
  30. package/src/client/components/ProviderForm.tsx +266 -0
  31. package/src/client/components/ProviderList.tsx +193 -0
  32. package/src/client/components/SettingsPage.tsx +43 -0
  33. package/src/client/index.tsx +2 -0
  34. package/src/client/locale.ts +12 -0
  35. package/src/client/plugin.tsx +34 -0
  36. package/src/index.ts +2 -0
  37. package/src/locale/en-US.json +54 -0
  38. package/src/locale/vi-VN.json +54 -0
  39. package/src/server/collections/doc-parser-providers.ts +107 -0
  40. package/src/server/collections/doc-parser-settings.ts +59 -0
  41. package/src/server/index.ts +10 -0
  42. package/src/server/plugin.ts +172 -0
  43. package/src/server/resource/docParserProviders.ts +72 -0
  44. package/src/server/services/builtin-ai-handler.ts +49 -0
  45. package/src/server/services/external-ocr-client.ts +233 -0
  46. package/src/server/services/internal-parser-registry.ts +126 -0
  47. package/src/server/services/parse-router.ts +357 -0
@@ -0,0 +1,10 @@
1
+ 'use strict';
2
+
3
+ // populates missing values
4
+ module.exports = function (dst, src) {
5
+ Object.keys(src).forEach(function (prop) {
6
+ dst[prop] = dst[prop] || src[prop]; // eslint-disable-line no-param-reassign
7
+ });
8
+
9
+ return dst;
10
+ };
@@ -0,0 +1 @@
1
+ {"author":"Felix Geisendörfer <felix@debuggable.com> (http://debuggable.com/)","name":"form-data","description":"A library to create readable \"multipart/form-data\" streams. Can be used to submit forms and file uploads to other web applications.","version":"4.0.5","repository":{"type":"git","url":"git://github.com/form-data/form-data.git"},"main":"./lib/form_data","browser":"./lib/browser","typings":"./index.d.ts","scripts":{"pretest":"npm run lint","pretests-only":"rimraf coverage test/tmp","tests-only":"istanbul cover test/run.js","posttests-only":"istanbul report lcov text","test":"npm run tests-only","posttest":"npx npm@'>=10.2' audit --production","lint":"eslint --ext=js,mjs .","report":"istanbul report lcov text","ci-lint":"is-node-modern 8 && npm run lint || is-node-not-modern 8","ci-test":"npm run tests-only && npm run browser && npm run report","predebug":"rimraf coverage test/tmp","debug":"verbose=1 ./test/run.js","browser":"browserify -t browserify-istanbul test/run-browser.js | obake --coverage","check":"istanbul check-coverage coverage/coverage*.json","files":"pkgfiles --sort=name","get-version":"node -e \"console.log(require('./package.json').version)\"","update-readme":"sed -i.bak 's/\\/master\\.svg/\\/v'$(npm --silent run get-version)'.svg/g' README.md","postupdate-readme":"mv README.md.bak READ.ME.md.bak","restore-readme":"mv READ.ME.md.bak README.md","prepublish":"not-in-publish || npm run prepublishOnly","prepack":"npm run update-readme","postpack":"npm run restore-readme","version":"auto-changelog && git add CHANGELOG.md","postversion":"auto-changelog && git add CHANGELOG.md && git commit --no-edit --amend && git tag -f \"v$(node -e \"console.log(require('./package.json').version)\")\""},"engines":{"node":">= 6"},"dependencies":{"asynckit":"^0.4.0","combined-stream":"^1.0.8","es-set-tostringtag":"^2.1.0","hasown":"^2.0.2","mime-types":"^2.1.12"},"devDependencies":{"@ljharb/eslint-config":"^21.4.0","auto-changelog":"^2.5.0","browserify":"^13.3.0","browserify-istanbul":"^2.0.0","coveralls":"^3.1.1","cross-spawn":"^6.0.6","eslint":"^8.57.1","fake":"^0.2.2","far":"^0.0.7","formidable":"^1.2.6","in-publish":"^2.0.1","is-node-modern":"^1.0.0","istanbul":"^0.4.5","js-randomness-predictor":"^1.5.5","obake":"^0.1.2","pkgfiles":"^2.3.2","pre-commit":"^1.2.2","puppeteer":"^1.20.0","request":"~2.87.0","rimraf":"^2.7.1","semver":"^6.3.1","tape":"^5.9.0"},"license":"MIT","auto-changelog":{"output":"CHANGELOG.md","template":"keepachangelog","unreleased":false,"commitLimit":false,"backfillLimit":false,"hideCredit":true},"_lastModified":"2026-04-03T18:52:21.070Z"}
@@ -0,0 +1,137 @@
1
+ /**
2
+ * This file is part of the NocoBase (R) project.
3
+ * Copyright (c) 2020-2024 NocoBase Co., Ltd.
4
+ * Authors: NocoBase Team.
5
+ *
6
+ * This project is dual-licensed under AGPL-3.0 and NocoBase Commercial License.
7
+ * For more information, please refer to: https://www.nocobase.com/agreement.
8
+ */
9
+
10
+ var __defProp = Object.defineProperty;
11
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
12
+ var __getOwnPropNames = Object.getOwnPropertyNames;
13
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
14
+ var __export = (target, all) => {
15
+ for (var name in all)
16
+ __defProp(target, name, { get: all[name], enumerable: true });
17
+ };
18
+ var __copyProps = (to, from, except, desc) => {
19
+ if (from && typeof from === "object" || typeof from === "function") {
20
+ for (let key of __getOwnPropNames(from))
21
+ if (!__hasOwnProp.call(to, key) && key !== except)
22
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
23
+ }
24
+ return to;
25
+ };
26
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
27
+ var doc_parser_providers_exports = {};
28
+ __export(doc_parser_providers_exports, {
29
+ default: () => doc_parser_providers_default
30
+ });
31
+ module.exports = __toCommonJS(doc_parser_providers_exports);
32
+ var import_database = require("@nocobase/database");
33
+ var doc_parser_providers_default = (0, import_database.defineCollection)({
34
+ name: "docParserProviders",
35
+ title: "Document Parser Providers",
36
+ fields: [
37
+ {
38
+ name: "title",
39
+ type: "string"
40
+ },
41
+ {
42
+ name: "enabled",
43
+ type: "boolean",
44
+ defaultValue: true
45
+ },
46
+ // ── Endpoint ──────────────────────────────────────────────────────────────
47
+ {
48
+ name: "apiEndpoint",
49
+ type: "string",
50
+ comment: "Full URL, e.g. https://ocr.example.com/v1/parse"
51
+ },
52
+ // ── Authentication ────────────────────────────────────────────────────────
53
+ {
54
+ name: "authType",
55
+ type: "string",
56
+ defaultValue: "bearer",
57
+ comment: "'bearer' | 'api-key-header' | 'basic' | 'custom-headers' | 'none'"
58
+ },
59
+ {
60
+ // Encrypted at rest via NocoBase password field
61
+ name: "apiKey",
62
+ type: "password",
63
+ allowNull: true,
64
+ comment: "Used for bearer / api-key-header auth"
65
+ },
66
+ {
67
+ name: "authConfig",
68
+ type: "json",
69
+ defaultValue: {},
70
+ comment: JSON.stringify({
71
+ headerName: "X-Api-Key",
72
+ // for api-key-header
73
+ username: "",
74
+ // for basic auth
75
+ password: "",
76
+ // for basic auth
77
+ customHeaders: {}
78
+ // for custom-headers: { "X-Foo": "bar" }
79
+ })
80
+ },
81
+ // ── Request format ────────────────────────────────────────────────────────
82
+ {
83
+ name: "requestFormat",
84
+ type: "string",
85
+ defaultValue: "multipart",
86
+ comment: "'multipart' | 'json-base64' | 'url'"
87
+ },
88
+ {
89
+ name: "requestConfig",
90
+ type: "json",
91
+ defaultValue: {},
92
+ comment: JSON.stringify({
93
+ // multipart
94
+ fileFieldName: "file",
95
+ filenameFieldName: "",
96
+ // optional extra field for filename
97
+ mimetypeFieldName: "",
98
+ // optional extra field for mimetype
99
+ extraFields: {},
100
+ // extra form fields
101
+ // json-base64
102
+ base64FieldPath: "file",
103
+ // e.g. "document.content"
104
+ filenameFieldPath: "filename",
105
+ mimetypeFieldPath: "mimetype",
106
+ extraBody: {},
107
+ // url (send download URL instead of file bytes)
108
+ urlFieldPath: "url"
109
+ })
110
+ },
111
+ // ── Response extraction ───────────────────────────────────────────────────
112
+ {
113
+ name: "responseTextPath",
114
+ type: "string",
115
+ defaultValue: "text",
116
+ comment: "Dot-path into the JSON response body, e.g. 'data.text' or 'result.pages[0].content'"
117
+ },
118
+ // ── Scope ─────────────────────────────────────────────────────────────────
119
+ {
120
+ name: "supportedMimetypes",
121
+ type: "json",
122
+ defaultValue: [],
123
+ comment: 'Empty = handle everything routed to external. e.g. ["application/pdf"]'
124
+ },
125
+ {
126
+ name: "timeout",
127
+ type: "integer",
128
+ defaultValue: 6e4,
129
+ comment: "HTTP request timeout in milliseconds"
130
+ },
131
+ {
132
+ name: "options",
133
+ type: "json",
134
+ defaultValue: {}
135
+ }
136
+ ]
137
+ });
@@ -0,0 +1,85 @@
1
+ /**
2
+ * This file is part of the NocoBase (R) project.
3
+ * Copyright (c) 2020-2024 NocoBase Co., Ltd.
4
+ * Authors: NocoBase Team.
5
+ *
6
+ * This project is dual-licensed under AGPL-3.0 and NocoBase Commercial License.
7
+ * For more information, please refer to: https://www.nocobase.com/agreement.
8
+ */
9
+
10
+ var __defProp = Object.defineProperty;
11
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
12
+ var __getOwnPropNames = Object.getOwnPropertyNames;
13
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
14
+ var __export = (target, all) => {
15
+ for (var name in all)
16
+ __defProp(target, name, { get: all[name], enumerable: true });
17
+ };
18
+ var __copyProps = (to, from, except, desc) => {
19
+ if (from && typeof from === "object" || typeof from === "function") {
20
+ for (let key of __getOwnPropNames(from))
21
+ if (!__hasOwnProp.call(to, key) && key !== except)
22
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
23
+ }
24
+ return to;
25
+ };
26
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
27
+ var doc_parser_settings_exports = {};
28
+ __export(doc_parser_settings_exports, {
29
+ default: () => doc_parser_settings_default
30
+ });
31
+ module.exports = __toCommonJS(doc_parser_settings_exports);
32
+ var import_database = require("@nocobase/database");
33
+ var doc_parser_settings_default = (0, import_database.defineCollection)({
34
+ name: "docParserSettings",
35
+ title: "Document Parser Settings",
36
+ fields: [
37
+ {
38
+ name: "mode",
39
+ type: "string",
40
+ defaultValue: "default",
41
+ comment: "'default' | 'internal' | 'external'"
42
+ },
43
+ {
44
+ // FK to docParserProviders — which external provider is active
45
+ name: "activeProviderId",
46
+ type: "bigInt",
47
+ allowNull: true
48
+ },
49
+ {
50
+ // When internal/external parsing fails, fall back to the default provider logic
51
+ name: "fallbackToDefault",
52
+ type: "boolean",
53
+ defaultValue: true
54
+ },
55
+ {
56
+ // Images are always passed through to the default provider (they don't need OCR)
57
+ name: "imagePassThrough",
58
+ type: "boolean",
59
+ defaultValue: true
60
+ },
61
+ {
62
+ // Optional: restrict which extnames this plugin handles (empty = all non-image)
63
+ name: "includedExtnames",
64
+ type: "json",
65
+ defaultValue: [],
66
+ comment: 'e.g. [".pdf", ".docx"] \u2014 empty means all non-image files'
67
+ },
68
+ {
69
+ name: "options",
70
+ type: "json",
71
+ defaultValue: {}
72
+ },
73
+ {
74
+ /**
75
+ * When true and plugin-docpixie is active:
76
+ * - Trigger docpixie:processDocument (async indexing)
77
+ * - Return a metadata reference block instead of full text
78
+ * - LLM is instructed to call docpixie:query tool for retrieval
79
+ */
80
+ name: "useDocpixie",
81
+ type: "boolean",
82
+ defaultValue: false
83
+ }
84
+ ]
85
+ });
@@ -0,0 +1,51 @@
1
+ /**
2
+ * This file is part of the NocoBase (R) project.
3
+ * Copyright (c) 2020-2024 NocoBase Co., Ltd.
4
+ * Authors: NocoBase Team.
5
+ *
6
+ * This project is dual-licensed under AGPL-3.0 and NocoBase Commercial License.
7
+ * For more information, please refer to: https://www.nocobase.com/agreement.
8
+ */
9
+
10
+ var __create = Object.create;
11
+ var __defProp = Object.defineProperty;
12
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
13
+ var __getOwnPropNames = Object.getOwnPropertyNames;
14
+ var __getProtoOf = Object.getPrototypeOf;
15
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
16
+ var __export = (target, all) => {
17
+ for (var name in all)
18
+ __defProp(target, name, { get: all[name], enumerable: true });
19
+ };
20
+ var __copyProps = (to, from, except, desc) => {
21
+ if (from && typeof from === "object" || typeof from === "function") {
22
+ for (let key of __getOwnPropNames(from))
23
+ if (!__hasOwnProp.call(to, key) && key !== except)
24
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
25
+ }
26
+ return to;
27
+ };
28
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
29
+ // If the importer is in node compatibility mode or this is not an ESM
30
+ // file that has been converted to a CommonJS file using a Babel-
31
+ // compatible transform (i.e. "__esModule" has not been set), then set
32
+ // "default" to the CommonJS "module.exports" for node compatibility.
33
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
34
+ mod
35
+ ));
36
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
37
+ var server_exports = {};
38
+ __export(server_exports, {
39
+ InternalParserRegistry: () => import_internal_parser_registry.InternalParserRegistry,
40
+ PluginDocumentParserServer: () => import_plugin2.PluginDocumentParserServer,
41
+ default: () => import_plugin.default
42
+ });
43
+ module.exports = __toCommonJS(server_exports);
44
+ var import_plugin = __toESM(require("./plugin"));
45
+ var import_plugin2 = require("./plugin");
46
+ var import_internal_parser_registry = require("./services/internal-parser-registry");
47
+ // Annotate the CommonJS export names for ESM import in node:
48
+ 0 && (module.exports = {
49
+ InternalParserRegistry,
50
+ PluginDocumentParserServer
51
+ });
@@ -0,0 +1,181 @@
1
+ /**
2
+ * This file is part of the NocoBase (R) project.
3
+ * Copyright (c) 2020-2024 NocoBase Co., Ltd.
4
+ * Authors: NocoBase Team.
5
+ *
6
+ * This project is dual-licensed under AGPL-3.0 and NocoBase Commercial License.
7
+ * For more information, please refer to: https://www.nocobase.com/agreement.
8
+ */
9
+
10
+ var __create = Object.create;
11
+ var __defProp = Object.defineProperty;
12
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
13
+ var __getOwnPropNames = Object.getOwnPropertyNames;
14
+ var __getProtoOf = Object.getPrototypeOf;
15
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
16
+ var __export = (target, all) => {
17
+ for (var name in all)
18
+ __defProp(target, name, { get: all[name], enumerable: true });
19
+ };
20
+ var __copyProps = (to, from, except, desc) => {
21
+ if (from && typeof from === "object" || typeof from === "function") {
22
+ for (let key of __getOwnPropNames(from))
23
+ if (!__hasOwnProp.call(to, key) && key !== except)
24
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
25
+ }
26
+ return to;
27
+ };
28
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
29
+ // If the importer is in node compatibility mode or this is not an ESM
30
+ // file that has been converted to a CommonJS file using a Babel-
31
+ // compatible transform (i.e. "__esModule" has not been set), then set
32
+ // "default" to the CommonJS "module.exports" for node compatibility.
33
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
34
+ mod
35
+ ));
36
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
37
+ var plugin_exports = {};
38
+ __export(plugin_exports, {
39
+ PluginDocumentParserServer: () => PluginDocumentParserServer,
40
+ default: () => plugin_default
41
+ });
42
+ module.exports = __toCommonJS(plugin_exports);
43
+ var import_server = require("@nocobase/server");
44
+ var import_path = require("path");
45
+ var import_axios = __toESM(require("axios"));
46
+ var import_internal_parser_registry = require("./services/internal-parser-registry");
47
+ var import_builtin_ai_handler = require("./services/builtin-ai-handler");
48
+ var import_parse_router = require("./services/parse-router");
49
+ var import_docParserProviders = require("./resource/docParserProviders");
50
+ class PluginDocumentParserServer extends import_server.Plugin {
51
+ /**
52
+ * Public registry — other plugins register their format handlers here:
53
+ *
54
+ * const docParser = this.pm.get(PluginDocumentParserServer);
55
+ * docParser.internalParserRegistry.register({ name, supports, parse });
56
+ */
57
+ internalParserRegistry = new import_internal_parser_registry.InternalParserRegistry();
58
+ parseRouter;
59
+ // ── Lifecycle ─────────────────────────────────────────────────────────────
60
+ async beforeLoad() {
61
+ this.internalParserRegistry.register(
62
+ new import_builtin_ai_handler.BuiltinAIDocumentHandler(() => {
63
+ const aiPlugin = this.pm.get("@nocobase/plugin-ai");
64
+ return aiPlugin == null ? void 0 : aiPlugin.documentLoaders;
65
+ })
66
+ );
67
+ }
68
+ async load() {
69
+ await this.importCollections((0, import_path.resolve)(__dirname, "collections"));
70
+ this.parseRouter = new import_parse_router.ParseRouter(
71
+ () => this.db.getRepository("docParserSettings"),
72
+ () => this.db.getRepository("docParserProviders"),
73
+ this.internalParserRegistry,
74
+ this.fetchFileBuffer.bind(this),
75
+ () => {
76
+ const p = this.pm.get("@nocobase/plugin-docpixie");
77
+ return (p == null ? void 0 : p.service) ? p : null;
78
+ }
79
+ );
80
+ this.wrapAIManager();
81
+ this.app.resourceManager.define({
82
+ name: "docParserProviders",
83
+ actions: {
84
+ testConnection: import_docParserProviders.testConnection
85
+ }
86
+ });
87
+ this.app.resourceManager.define({
88
+ name: "docParserSettings",
89
+ actions: {
90
+ get: import_docParserProviders.getSettings,
91
+ save: import_docParserProviders.saveSettings
92
+ }
93
+ });
94
+ this.app.acl.allow("docParserProviders", ["list", "create", "update", "destroy", "get", "testConnection"], "loggedIn");
95
+ this.app.acl.allow("docParserSettings", ["get", "save"], "loggedIn");
96
+ }
97
+ // ── AIManager patching ────────────────────────────────────────────────────
98
+ /**
99
+ * Wrap AIManager.registerLLMProvider so that every provider class — including
100
+ * those registered AFTER this plugin loads (e.g. plugin-custom-llm) — gets
101
+ * its `parseAttachment` intercepted.
102
+ *
103
+ * Additionally, iterate providers already registered (plugin-ai built-ins:
104
+ * OpenAI, Anthropic, etc.) and wrap them immediately.
105
+ */
106
+ wrapAIManager() {
107
+ const aiPlugin = this.pm.get("@nocobase/plugin-ai");
108
+ if (!(aiPlugin == null ? void 0 : aiPlugin.aiManager)) {
109
+ this.log.warn("[DocumentParser] plugin-ai not found \u2014 parseAttachment interception skipped");
110
+ return;
111
+ }
112
+ const aiManager = aiPlugin.aiManager;
113
+ const self = this;
114
+ const originalRegister = aiManager.registerLLMProvider.bind(aiManager);
115
+ aiManager.registerLLMProvider = (name, meta) => {
116
+ return originalRegister(name, { ...meta, provider: self.wrapProviderClass(meta.provider) });
117
+ };
118
+ for (const [name, meta] of aiManager.llmProviders.entries()) {
119
+ aiManager.llmProviders.set(name, { ...meta, provider: self.wrapProviderClass(meta.provider) });
120
+ }
121
+ this.log.info(`[DocumentParser] Wrapped ${aiManager.llmProviders.size} LLM providers`);
122
+ }
123
+ /**
124
+ * Create a subclass of `OriginalProviderClass` that overrides `parseAttachment`
125
+ * to go through our router first. Uses `super.parseAttachment` as the default
126
+ * parser fallback — this correctly handles providers that already override the
127
+ * method (e.g. CustomLLMProvider, AnthropicProvider…).
128
+ */
129
+ wrapProviderClass(OriginalClass) {
130
+ const self = this;
131
+ return class extends OriginalClass {
132
+ async parseAttachment(ctx, attachment) {
133
+ return self.parseRouter.route(
134
+ ctx,
135
+ attachment,
136
+ () => super.parseAttachment(ctx, attachment)
137
+ );
138
+ }
139
+ };
140
+ }
141
+ // ── File buffer helper ────────────────────────────────────────────────────
142
+ /**
143
+ * Fetch the raw bytes of an attachment using the file-manager plugin,
144
+ * returning both the buffer and the resolved URL.
145
+ */
146
+ async fetchFileBuffer(ctx, attachment) {
147
+ var _a, _b;
148
+ const fileManager = this.app.pm.get("file-manager");
149
+ const rawUrl = await fileManager.getFileURL(attachment);
150
+ const url = decodeURIComponent(rawUrl);
151
+ if (url.startsWith("http://") || url.startsWith("https://")) {
152
+ const referer = ((_a = ctx.get) == null ? void 0 : _a.call(ctx, "referer")) || "";
153
+ const ua = ((_b = ctx.get) == null ? void 0 : _b.call(ctx, "user-agent")) || "";
154
+ const response = await import_axios.default.get(url, {
155
+ responseType: "arraybuffer",
156
+ timeout: 6e4,
157
+ headers: { referer, "User-Agent": ua }
158
+ });
159
+ return { buffer: Buffer.from(response.data), url };
160
+ }
161
+ const { resolve: resolvePath, sep } = require("path");
162
+ const { readFile } = require("fs/promises");
163
+ let localPath = url;
164
+ const appPublicPath = (process.env.APP_PUBLIC_PATH || "/").replace(/\/+$/, "");
165
+ if (appPublicPath && localPath.startsWith(appPublicPath + "/")) {
166
+ localPath = localPath.slice(appPublicPath.length);
167
+ }
168
+ const storageRoot = resolvePath(process.cwd());
169
+ const absPath = resolvePath(storageRoot, localPath.replace(/^\//, ""));
170
+ if (!absPath.startsWith(storageRoot + sep) && absPath !== storageRoot) {
171
+ throw new Error(`[DocumentParser] Attachment path escapes storage root: ${localPath}`);
172
+ }
173
+ const buffer = await readFile(absPath);
174
+ return { buffer, url };
175
+ }
176
+ }
177
+ var plugin_default = PluginDocumentParserServer;
178
+ // Annotate the CommonJS export names for ESM import in node:
179
+ 0 && (module.exports = {
180
+ PluginDocumentParserServer
181
+ });
@@ -0,0 +1,91 @@
1
+ /**
2
+ * This file is part of the NocoBase (R) project.
3
+ * Copyright (c) 2020-2024 NocoBase Co., Ltd.
4
+ * Authors: NocoBase Team.
5
+ *
6
+ * This project is dual-licensed under AGPL-3.0 and NocoBase Commercial License.
7
+ * For more information, please refer to: https://www.nocobase.com/agreement.
8
+ */
9
+
10
+ var __defProp = Object.defineProperty;
11
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
12
+ var __getOwnPropNames = Object.getOwnPropertyNames;
13
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
14
+ var __export = (target, all) => {
15
+ for (var name in all)
16
+ __defProp(target, name, { get: all[name], enumerable: true });
17
+ };
18
+ var __copyProps = (to, from, except, desc) => {
19
+ if (from && typeof from === "object" || typeof from === "function") {
20
+ for (let key of __getOwnPropNames(from))
21
+ if (!__hasOwnProp.call(to, key) && key !== except)
22
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
23
+ }
24
+ return to;
25
+ };
26
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
27
+ var docParserProviders_exports = {};
28
+ __export(docParserProviders_exports, {
29
+ getSettings: () => getSettings,
30
+ saveSettings: () => saveSettings,
31
+ testConnection: () => testConnection
32
+ });
33
+ module.exports = __toCommonJS(docParserProviders_exports);
34
+ var import_external_ocr_client = require("../services/external-ocr-client");
35
+ async function testConnection(ctx, next) {
36
+ const { filterByTk } = ctx.action.params;
37
+ const repo = ctx.db.getRepository("docParserProviders");
38
+ const record = await repo.findById(filterByTk);
39
+ if (!record) {
40
+ ctx.throw(404, "Provider not found");
41
+ return;
42
+ }
43
+ const result = await (0, import_external_ocr_client.testOcrProviderConnection)({
44
+ apiEndpoint: record.get("apiEndpoint"),
45
+ authType: record.get("authType"),
46
+ apiKey: record.get("apiKey"),
47
+ authConfig: record.get("authConfig") ?? {},
48
+ timeout: Math.min(record.get("timeout") ?? 1e4, 15e3)
49
+ // cap at 15s for test
50
+ });
51
+ ctx.body = result;
52
+ await next();
53
+ }
54
+ async function getSettings(ctx, next) {
55
+ const repo = ctx.db.getRepository("docParserSettings");
56
+ let record = await repo.findOne({});
57
+ if (!record) {
58
+ record = await repo.create({
59
+ values: {
60
+ mode: "default",
61
+ fallbackToDefault: true,
62
+ imagePassThrough: true,
63
+ includedExtnames: []
64
+ }
65
+ });
66
+ }
67
+ ctx.body = record;
68
+ await next();
69
+ }
70
+ async function saveSettings(ctx, next) {
71
+ var _a, _b;
72
+ const repo = ctx.db.getRepository("docParserSettings");
73
+ const body = ctx.request.body;
74
+ let record = await repo.findOne({});
75
+ if (!record) {
76
+ record = await repo.create({ values: body });
77
+ } else {
78
+ await repo.update({ filter: { id: record.get("id") }, values: body });
79
+ record = await repo.findOne({});
80
+ }
81
+ const plugin = ctx.app.pm.get("@nocobase/plugin-document-parser");
82
+ (_b = (_a = plugin == null ? void 0 : plugin.parseRouter) == null ? void 0 : _a.invalidateSettingsCache) == null ? void 0 : _b.call(_a);
83
+ ctx.body = record;
84
+ await next();
85
+ }
86
+ // Annotate the CommonJS export names for ESM import in node:
87
+ 0 && (module.exports = {
88
+ getSettings,
89
+ saveSettings,
90
+ testConnection
91
+ });
@@ -0,0 +1,63 @@
1
+ /**
2
+ * This file is part of the NocoBase (R) project.
3
+ * Copyright (c) 2020-2024 NocoBase Co., Ltd.
4
+ * Authors: NocoBase Team.
5
+ *
6
+ * This project is dual-licensed under AGPL-3.0 and NocoBase Commercial License.
7
+ * For more information, please refer to: https://www.nocobase.com/agreement.
8
+ */
9
+
10
+ var __defProp = Object.defineProperty;
11
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
12
+ var __getOwnPropNames = Object.getOwnPropertyNames;
13
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
14
+ var __export = (target, all) => {
15
+ for (var name in all)
16
+ __defProp(target, name, { get: all[name], enumerable: true });
17
+ };
18
+ var __copyProps = (to, from, except, desc) => {
19
+ if (from && typeof from === "object" || typeof from === "function") {
20
+ for (let key of __getOwnPropNames(from))
21
+ if (!__hasOwnProp.call(to, key) && key !== except)
22
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
23
+ }
24
+ return to;
25
+ };
26
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
27
+ var builtin_ai_handler_exports = {};
28
+ __export(builtin_ai_handler_exports, {
29
+ BuiltinAIDocumentHandler: () => BuiltinAIDocumentHandler
30
+ });
31
+ module.exports = __toCommonJS(builtin_ai_handler_exports);
32
+ const AI_SUPPORTED_EXTNAMES = /* @__PURE__ */ new Set([".pdf", ".ppt", ".pptx", ".doc", ".docx", ".txt"]);
33
+ class BuiltinAIDocumentHandler {
34
+ constructor(getDocumentLoaders) {
35
+ this.getDocumentLoaders = getDocumentLoaders;
36
+ }
37
+ name = "builtin-ai-document-loader";
38
+ supports(attachment) {
39
+ const ext = this.resolveExtname(attachment);
40
+ return AI_SUPPORTED_EXTNAMES.has(ext);
41
+ }
42
+ async parse(attachment, _ctx) {
43
+ const loaders = this.getDocumentLoaders();
44
+ const result = await loaders.cached.load(attachment);
45
+ if (!result.supported) {
46
+ return { text: "", handled: false };
47
+ }
48
+ return {
49
+ text: result.text ?? "",
50
+ handled: true
51
+ };
52
+ }
53
+ resolveExtname(attachment) {
54
+ if (attachment.extname) return attachment.extname.toLowerCase();
55
+ const name = attachment.filename ?? attachment.name ?? "";
56
+ const idx = name.lastIndexOf(".");
57
+ return idx >= 0 ? name.slice(idx).toLowerCase() : "";
58
+ }
59
+ }
60
+ // Annotate the CommonJS export names for ESM import in node:
61
+ 0 && (module.exports = {
62
+ BuiltinAIDocumentHandler
63
+ });