@twin.org/document-management-service 0.0.1-next.10 → 0.0.1-next.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -482,6 +482,11 @@ class DocumentManagementService {
482
482
  * @internal
483
483
  */
484
484
  _attestationComponent;
485
+ /**
486
+ * The connector for the data processing.
487
+ * @internal
488
+ */
489
+ _dataProcessingComponent;
485
490
  /**
486
491
  * Create a new instance of DocumentManagementService.
487
492
  * @param options The options for the service.
@@ -490,6 +495,7 @@ class DocumentManagementService {
490
495
  this._auditableItemGraphComponent = core.ComponentFactory.get(options?.auditableItemGraphComponentType ?? "auditable-item-graph");
491
496
  this._blobStorageComponent = core.ComponentFactory.get(options?.blobStorageComponentType ?? "blob-storage");
492
497
  this._attestationComponent = core.ComponentFactory.get(options?.attestationComponentType ?? "attestation");
498
+ this._dataProcessingComponent = core.ComponentFactory.get(options?.dataProcessingComponentType ?? "data-processing");
493
499
  standardsSchemaOrg.SchemaOrgDataTypes.registerRedirects();
494
500
  }
495
501
  /**
@@ -685,6 +691,8 @@ class DocumentManagementService {
685
691
  * @param options.includeBlobStorageData Flag to include the blob storage data for the document, defaults to false.
686
692
  * @param options.includeAttestation Flag to include the attestation information for the document, defaults to false.
687
693
  * @param options.includeRemoved Flag to include deleted documents, defaults to false.
694
+ * @param options.extractRuleGroupId If provided will extract data from the document using the specified rule group id.
695
+ * @param options.extractMimeType By default extraction will auto detect the mime type of the document, this can be used to override the detection.
688
696
  * @param cursor The cursor to get the next chunk of revisions.
689
697
  * @param pageSize Page size of items to return, defaults to 1 so only most recent is returned.
690
698
  * @param userIdentity The identity to perform the auditable item graph operation with.
@@ -918,7 +926,8 @@ class DocumentManagementService {
918
926
  * @param options.includeBlobStorageMetadata Flag to include the blob storage metadata for the document, defaults to false.
919
927
  * @param options.includeBlobStorageData Flag to include the blob storage data for the document, defaults to false.
920
928
  * @param options.includeAttestation Flag to include the attestation information for the document, defaults to false.
921
- * @param options.includeRemoved Flag to include deleted documents, defaults to false.
929
+ * @param options.extractRuleGroupId If provided will extract data from the document using the specified rule group id.
930
+ * @param options.extractMimeType By default extraction will auto detect the mime type of the document, this can be used to override the detection.
922
931
  * @param cursor The cursor to get the next chunk of revisions.
923
932
  * @param pageSize Page size of items to return, defaults to 1 so only most recent is returned.
924
933
  * @param userIdentity The identity to perform the auditable item graph operation with.
@@ -948,15 +957,31 @@ class DocumentManagementService {
948
957
  const includeBlobStorageMetadata = options?.includeBlobStorageMetadata ?? false;
949
958
  const includeBlobStorageData = options?.includeBlobStorageData ?? false;
950
959
  const includeAttestation = options?.includeAttestation ?? false;
960
+ const extractData = core.Is.stringValue(options?.extractRuleGroupId);
951
961
  for (let i = 0; i < slicedResources.length; i++) {
952
962
  const document = slicedResources[i].resourceObject;
953
963
  if (core.Is.object(document)) {
954
964
  docList.documents.push(document);
955
- if (includeBlobStorageMetadata || includeBlobStorageData) {
956
- const blobEntry = await this._blobStorageComponent.get(document.blobStorageId, includeBlobStorageData, userIdentity, nodeIdentity);
957
- document.blobStorageEntry = blobEntry;
958
- if (!docList["@context"].includes(blobStorageModels.BlobStorageContexts.ContextRoot)) {
959
- docList["@context"].push(blobStorageModels.BlobStorageContexts.ContextRoot);
965
+ const blobRequired = includeBlobStorageMetadata || includeBlobStorageData;
966
+ if (blobRequired || extractData) {
967
+ const blobEntry = await this._blobStorageComponent.get(document.blobStorageId, includeBlobStorageData || extractData, userIdentity, nodeIdentity);
968
+ if (blobRequired) {
969
+ document.blobStorageEntry = blobEntry;
970
+ if (!docList["@context"].includes(blobStorageModels.BlobStorageContexts.ContextRoot)) {
971
+ docList["@context"].push(blobStorageModels.BlobStorageContexts.ContextRoot);
972
+ }
973
+ }
974
+ if (core.Is.stringValue(options?.extractRuleGroupId) && core.Is.stringValue(blobEntry.blob)) {
975
+ const binaryBlob = core.Converter.base64ToBytes(blobEntry.blob);
976
+ document.extractedData = await this._dataProcessingComponent.extract(options.extractRuleGroupId, binaryBlob, undefined, options?.extractMimeType);
977
+ }
978
+ // If we have the blob data due to extraction but we weren't asked for it
979
+ // then we remove it from the document
980
+ if (!blobRequired) {
981
+ delete document.blobStorageEntry;
982
+ }
983
+ else if (!includeBlobStorageData) {
984
+ delete document.blobStorageEntry?.blob;
960
985
  }
961
986
  }
962
987
  if (includeAttestation && core.Is.stringValue(document.attestationId)) {
@@ -480,6 +480,11 @@ class DocumentManagementService {
480
480
  * @internal
481
481
  */
482
482
  _attestationComponent;
483
+ /**
484
+ * The connector for the data processing.
485
+ * @internal
486
+ */
487
+ _dataProcessingComponent;
483
488
  /**
484
489
  * Create a new instance of DocumentManagementService.
485
490
  * @param options The options for the service.
@@ -488,6 +493,7 @@ class DocumentManagementService {
488
493
  this._auditableItemGraphComponent = ComponentFactory.get(options?.auditableItemGraphComponentType ?? "auditable-item-graph");
489
494
  this._blobStorageComponent = ComponentFactory.get(options?.blobStorageComponentType ?? "blob-storage");
490
495
  this._attestationComponent = ComponentFactory.get(options?.attestationComponentType ?? "attestation");
496
+ this._dataProcessingComponent = ComponentFactory.get(options?.dataProcessingComponentType ?? "data-processing");
491
497
  SchemaOrgDataTypes.registerRedirects();
492
498
  }
493
499
  /**
@@ -683,6 +689,8 @@ class DocumentManagementService {
683
689
  * @param options.includeBlobStorageData Flag to include the blob storage data for the document, defaults to false.
684
690
  * @param options.includeAttestation Flag to include the attestation information for the document, defaults to false.
685
691
  * @param options.includeRemoved Flag to include deleted documents, defaults to false.
692
+ * @param options.extractRuleGroupId If provided will extract data from the document using the specified rule group id.
693
+ * @param options.extractMimeType By default extraction will auto detect the mime type of the document, this can be used to override the detection.
686
694
  * @param cursor The cursor to get the next chunk of revisions.
687
695
  * @param pageSize Page size of items to return, defaults to 1 so only most recent is returned.
688
696
  * @param userIdentity The identity to perform the auditable item graph operation with.
@@ -916,7 +924,8 @@ class DocumentManagementService {
916
924
  * @param options.includeBlobStorageMetadata Flag to include the blob storage metadata for the document, defaults to false.
917
925
  * @param options.includeBlobStorageData Flag to include the blob storage data for the document, defaults to false.
918
926
  * @param options.includeAttestation Flag to include the attestation information for the document, defaults to false.
919
- * @param options.includeRemoved Flag to include deleted documents, defaults to false.
927
+ * @param options.extractRuleGroupId If provided will extract data from the document using the specified rule group id.
928
+ * @param options.extractMimeType By default extraction will auto detect the mime type of the document, this can be used to override the detection.
920
929
  * @param cursor The cursor to get the next chunk of revisions.
921
930
  * @param pageSize Page size of items to return, defaults to 1 so only most recent is returned.
922
931
  * @param userIdentity The identity to perform the auditable item graph operation with.
@@ -946,15 +955,31 @@ class DocumentManagementService {
946
955
  const includeBlobStorageMetadata = options?.includeBlobStorageMetadata ?? false;
947
956
  const includeBlobStorageData = options?.includeBlobStorageData ?? false;
948
957
  const includeAttestation = options?.includeAttestation ?? false;
958
+ const extractData = Is.stringValue(options?.extractRuleGroupId);
949
959
  for (let i = 0; i < slicedResources.length; i++) {
950
960
  const document = slicedResources[i].resourceObject;
951
961
  if (Is.object(document)) {
952
962
  docList.documents.push(document);
953
- if (includeBlobStorageMetadata || includeBlobStorageData) {
954
- const blobEntry = await this._blobStorageComponent.get(document.blobStorageId, includeBlobStorageData, userIdentity, nodeIdentity);
955
- document.blobStorageEntry = blobEntry;
956
- if (!docList["@context"].includes(BlobStorageContexts.ContextRoot)) {
957
- docList["@context"].push(BlobStorageContexts.ContextRoot);
963
+ const blobRequired = includeBlobStorageMetadata || includeBlobStorageData;
964
+ if (blobRequired || extractData) {
965
+ const blobEntry = await this._blobStorageComponent.get(document.blobStorageId, includeBlobStorageData || extractData, userIdentity, nodeIdentity);
966
+ if (blobRequired) {
967
+ document.blobStorageEntry = blobEntry;
968
+ if (!docList["@context"].includes(BlobStorageContexts.ContextRoot)) {
969
+ docList["@context"].push(BlobStorageContexts.ContextRoot);
970
+ }
971
+ }
972
+ if (Is.stringValue(options?.extractRuleGroupId) && Is.stringValue(blobEntry.blob)) {
973
+ const binaryBlob = Converter.base64ToBytes(blobEntry.blob);
974
+ document.extractedData = await this._dataProcessingComponent.extract(options.extractRuleGroupId, binaryBlob, undefined, options?.extractMimeType);
975
+ }
976
+ // If we have the blob data due to extraction but we weren't asked for it
977
+ // then we remove it from the document
978
+ if (!blobRequired) {
979
+ delete document.blobStorageEntry;
980
+ }
981
+ else if (!includeBlobStorageData) {
982
+ delete document.blobStorageEntry?.blob;
958
983
  }
959
984
  }
960
985
  if (includeAttestation && Is.stringValue(document.attestationId)) {
@@ -72,6 +72,8 @@ export declare class DocumentManagementService implements IDocumentManagementCom
72
72
  * @param options.includeBlobStorageData Flag to include the blob storage data for the document, defaults to false.
73
73
  * @param options.includeAttestation Flag to include the attestation information for the document, defaults to false.
74
74
  * @param options.includeRemoved Flag to include deleted documents, defaults to false.
75
+ * @param options.extractRuleGroupId If provided will extract data from the document using the specified rule group id.
76
+ * @param options.extractMimeType By default extraction will auto detect the mime type of the document, this can be used to override the detection.
75
77
  * @param cursor The cursor to get the next chunk of revisions.
76
78
  * @param pageSize Page size of items to return, defaults to 1 so only most recent is returned.
77
79
  * @param userIdentity The identity to perform the auditable item graph operation with.
@@ -83,6 +85,8 @@ export declare class DocumentManagementService implements IDocumentManagementCom
83
85
  includeBlobStorageData?: boolean;
84
86
  includeAttestation?: boolean;
85
87
  includeRemoved?: boolean;
88
+ extractRuleGroupId?: string;
89
+ extractMimeType?: string;
86
90
  }, cursor?: string, pageSize?: number, userIdentity?: string, nodeIdentity?: string): Promise<IDocumentList>;
87
91
  /**
88
92
  * Remove an auditable item graph vertex using it's id.
@@ -18,6 +18,11 @@ export interface IDocumentManagementServiceConstructorOptions {
18
18
  * @default attestation
19
19
  */
20
20
  attestationComponentType?: string;
21
+ /**
22
+ * The type of the data processing component.
23
+ * @default data-processing
24
+ */
25
+ dataProcessingComponentType?: string;
21
26
  /**
22
27
  * The configuration for the service.
23
28
  */
package/docs/changelog.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # @twin.org/document-management-service - Changelog
2
2
 
3
+ ## [0.0.1-next.11](https://github.com/twinfoundation/document-management/compare/document-management-service-v0.0.1-next.10...document-management-service-v0.0.1-next.11) (2025-04-28)
4
+
5
+
6
+ ### Features
7
+
8
+ * document get can perform extraction ([#6](https://github.com/twinfoundation/document-management/issues/6)) ([5ce6d37](https://github.com/twinfoundation/document-management/commit/5ce6d37432ad271ca5783f422846f4be98ec2215))
9
+
10
+
11
+ ### Dependencies
12
+
13
+ * The following workspace dependencies were updated
14
+ * dependencies
15
+ * @twin.org/document-management-models bumped from 0.0.1-next.10 to 0.0.1-next.11
16
+
3
17
  ## [0.0.1-next.10](https://github.com/twinfoundation/document-management/compare/document-management-service-v0.0.1-next.9...document-management-service-v0.0.1-next.10) (2025-04-25)
4
18
 
5
19
 
@@ -477,6 +477,24 @@
477
477
  "type": "boolean"
478
478
  }
479
479
  },
480
+ {
481
+ "name": "extractRuleGroupId",
482
+ "description": "If provided will extract data from the document using the specified rule group id.",
483
+ "in": "query",
484
+ "required": false,
485
+ "schema": {
486
+ "type": "string"
487
+ }
488
+ },
489
+ {
490
+ "name": "extractMimeType",
491
+ "description": "By default extraction will auto detect the mime type of the document, this can be used to override the detection.",
492
+ "in": "query",
493
+ "required": false,
494
+ "schema": {
495
+ "type": "string"
496
+ }
497
+ },
480
498
  {
481
499
  "name": "pageSize",
482
500
  "description": "Page size of items to return, defaults to 1 so only most recent is returned.",
@@ -1467,6 +1485,9 @@
1467
1485
  "blobStorageEntry": {
1468
1486
  "$ref": "#/components/schemas/BlobStorageEntry"
1469
1487
  },
1488
+ "extractedData": {
1489
+ "description": "The data extracted from the document using data extraction services."
1490
+ },
1470
1491
  "attestationId": {
1471
1492
  "type": "string",
1472
1493
  "description": "The attestation for the document if one was created."
@@ -240,6 +240,18 @@ Flag to include the attestation information for the document, defaults to false.
240
240
 
241
241
  Flag to include deleted documents, defaults to false.
242
242
 
243
+ ###### extractRuleGroupId?
244
+
245
+ `string`
246
+
247
+ If provided will extract data from the document using the specified rule group id.
248
+
249
+ ###### extractMimeType?
250
+
251
+ `string`
252
+
253
+ By default extraction will auto detect the mime type of the document, this can be used to override the detection.
254
+
243
255
  ##### cursor?
244
256
 
245
257
  `string`
@@ -46,6 +46,20 @@ attestation
46
46
 
47
47
  ***
48
48
 
49
+ ### dataProcessingComponentType?
50
+
51
+ > `optional` **dataProcessingComponentType**: `string`
52
+
53
+ The type of the data processing component.
54
+
55
+ #### Default
56
+
57
+ ```ts
58
+ data-processing
59
+ ```
60
+
61
+ ***
62
+
49
63
  ### config?
50
64
 
51
65
  > `optional` **config**: [`IDocumentManagementServiceConfig`](IDocumentManagementServiceConfig.md)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@twin.org/document-management-service",
3
- "version": "0.0.1-next.10",
3
+ "version": "0.0.1-next.11",
4
4
  "description": "Document management contract implementation and REST endpoint definitions",
5
5
  "repository": {
6
6
  "type": "git",
@@ -21,7 +21,8 @@
21
21
  "@twin.org/core": "next",
22
22
  "@twin.org/crypto": "next",
23
23
  "@twin.org/data-json-ld": "next",
24
- "@twin.org/document-management-models": "0.0.1-next.10",
24
+ "@twin.org/data-processing-models": "next",
25
+ "@twin.org/document-management-models": "0.0.1-next.11",
25
26
  "@twin.org/entity": "next",
26
27
  "@twin.org/entity-storage-models": "next",
27
28
  "@twin.org/nameof": "next",