@azure/ai-form-recognizer 5.0.0-alpha.20250226.1 → 5.0.0-alpha.20250228.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +292 -330
  2. package/dist/browser/documentAnalysisClient.d.ts +220 -131
  3. package/dist/browser/documentAnalysisClient.d.ts.map +1 -1
  4. package/dist/browser/documentAnalysisClient.js +57 -35
  5. package/dist/browser/documentAnalysisClient.js.map +1 -1
  6. package/dist/browser/documentModelAdministrationClient.d.ts +291 -123
  7. package/dist/browser/documentModelAdministrationClient.d.ts.map +1 -1
  8. package/dist/browser/documentModelAdministrationClient.js +184 -82
  9. package/dist/browser/documentModelAdministrationClient.js.map +1 -1
  10. package/dist/browser/lro/util/delayMs.d.ts +1 -1
  11. package/dist/browser/lro/util/delayMs.js +1 -1
  12. package/dist/browser/lro/util/delayMs.js.map +1 -1
  13. package/dist/commonjs/documentAnalysisClient.d.ts +220 -131
  14. package/dist/commonjs/documentAnalysisClient.d.ts.map +1 -1
  15. package/dist/commonjs/documentAnalysisClient.js +57 -35
  16. package/dist/commonjs/documentAnalysisClient.js.map +1 -1
  17. package/dist/commonjs/documentModelAdministrationClient.d.ts +291 -123
  18. package/dist/commonjs/documentModelAdministrationClient.d.ts.map +1 -1
  19. package/dist/commonjs/documentModelAdministrationClient.js +184 -82
  20. package/dist/commonjs/documentModelAdministrationClient.js.map +1 -1
  21. package/dist/commonjs/lro/util/delayMs.d.ts +1 -1
  22. package/dist/commonjs/lro/util/delayMs.js +1 -1
  23. package/dist/commonjs/lro/util/delayMs.js.map +1 -1
  24. package/dist/esm/documentAnalysisClient.d.ts +220 -131
  25. package/dist/esm/documentAnalysisClient.d.ts.map +1 -1
  26. package/dist/esm/documentAnalysisClient.js +57 -35
  27. package/dist/esm/documentAnalysisClient.js.map +1 -1
  28. package/dist/esm/documentModelAdministrationClient.d.ts +291 -123
  29. package/dist/esm/documentModelAdministrationClient.d.ts.map +1 -1
  30. package/dist/esm/documentModelAdministrationClient.js +184 -82
  31. package/dist/esm/documentModelAdministrationClient.js.map +1 -1
  32. package/dist/esm/lro/util/delayMs.d.ts +1 -1
  33. package/dist/esm/lro/util/delayMs.js +1 -1
  34. package/dist/esm/lro/util/delayMs.js.map +1 -1
  35. package/dist/react-native/documentAnalysisClient.d.ts +220 -131
  36. package/dist/react-native/documentAnalysisClient.d.ts.map +1 -1
  37. package/dist/react-native/documentAnalysisClient.js +57 -35
  38. package/dist/react-native/documentAnalysisClient.js.map +1 -1
  39. package/dist/react-native/documentModelAdministrationClient.d.ts +291 -123
  40. package/dist/react-native/documentModelAdministrationClient.d.ts.map +1 -1
  41. package/dist/react-native/documentModelAdministrationClient.js +184 -82
  42. package/dist/react-native/documentModelAdministrationClient.js.map +1 -1
  43. package/dist/react-native/lro/util/delayMs.d.ts +1 -1
  44. package/dist/react-native/lro/util/delayMs.js +1 -1
  45. package/dist/react-native/lro/util/delayMs.js.map +1 -1
  46. package/package.json +19 -18
package/README.md CHANGED
@@ -29,11 +29,10 @@ npm install @azure/ai-form-recognizer
29
29
 
30
30
  ## Getting started
31
31
 
32
- ```javascript
33
- const { DocumentAnalysisClient } = require("@azure/ai-form-recognizer");
34
- const { DefaultAzureCredential } = require("@azure/identity");
35
-
36
- const fs = require("fs");
32
+ ```ts snippet:ReadmeSampleCreateClient_Node
33
+ import { DefaultAzureCredential } from "@azure/identity";
34
+ import { DocumentAnalysisClient } from "@azure/ai-form-recognizer";
35
+ import { createReadStream } from "node:fs";
37
36
 
38
37
  const credential = new DefaultAzureCredential();
39
38
  const client = new DocumentAnalysisClient(
@@ -42,10 +41,10 @@ const client = new DocumentAnalysisClient(
42
41
  );
43
42
 
44
43
  // Document Intelligence supports many different types of files.
45
- const file = fs.createReadStream("path/to/file.jpg");
44
+ const file = createReadStream("path/to/file.jpg");
46
45
  const poller = await client.beginAnalyzeDocument("<model ID>", file);
47
46
 
48
- const { pages, tables, styles, keyValuePairs, entities, documents } = await poller.pollUntilDone();
47
+ const { pages, tables, styles, keyValuePairs, documents } = await poller.pollUntilDone();
49
48
  ```
50
49
 
51
50
  ### Currently supported environments
@@ -108,10 +107,14 @@ az cognitiveservices account keys list --resource-group <your-resource-group-nam
108
107
 
109
108
  Once you have an API key and endpoint, you can use it as follows:
110
109
 
111
- ```js
112
- const { DocumentAnalysisClient, AzureKeyCredential } = require("@azure/ai-form-recognizer");
110
+ ```ts snippet:ReadmeSampleCreateClient_KeyCredential
111
+ import { AzureKeyCredential, DocumentAnalysisClient } from "@azure/ai-form-recognizer";
113
112
 
114
- const client = new DocumentAnalysisClient("<endpoint>", new AzureKeyCredential("<API key>"));
113
+ const credential = new AzureKeyCredential("<API key>");
114
+ const client = new DocumentAnalysisClient(
115
+ "https://<resource name>.cognitiveservices.azure.com",
116
+ credential,
117
+ );
115
118
  ```
116
119
 
117
120
  #### Use Azure Active Directory
@@ -126,11 +129,15 @@ To authenticate using a service principal, you will also need to [register an AA
126
129
 
127
130
  Set the values of the client ID, tenant ID, and client secret of the AAD application as environment variables: `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_CLIENT_SECRET`.
128
131
 
129
- ```js
130
- const { DocumentAnalysisClient } = require("@azure/ai-form-recognizer");
131
- const { DefaultAzureCredential } = require("@azure/identity");
132
+ ```ts snippet:ReadmeSampleCreateClient_TokenCredential
133
+ import { DefaultAzureCredential } from "@azure/identity";
134
+ import { DocumentAnalysisClient } from "@azure/ai-form-recognizer";
132
135
 
133
- const client = new DocumentAnalysisClient("<endpoint>", new DefaultAzureCredential());
136
+ const credential = new DefaultAzureCredential();
137
+ const client = new DocumentAnalysisClient(
138
+ "https://<resource name>.cognitiveservices.azure.com",
139
+ credential,
140
+ );
134
141
  ```
135
142
 
136
143
  ## Key concepts
@@ -179,56 +186,53 @@ The following section provides several JavaScript code snippets illustrating com
179
186
 
180
187
  The `beginAnalyzeDocument` method can extract fields and table data from documents. Analysis may use either a custom model, trained with your own data, or a prebuilt model provided by the service (see _[Use Prebuilt Models](#use-prebuilt-models)_ below). A custom model is tailored to your own documents, so it should only be used with documents of the same structure as one of the document types in the model (there may be multiple, such as in a composed model).
181
188
 
182
- ```javascript
183
- const { DocumentAnalysisClient, AzureKeyCredential } = require("@azure/ai-form-recognizer");
184
-
185
- const fs = require("fs");
186
-
187
- async function main() {
188
- const endpoint = "<cognitive services endpoint>";
189
- const apiKey = "<api key>";
190
- const modelId = "<model id>";
191
- const path = "<path to a document>";
192
-
193
- const readStream = fs.createReadStream(path);
194
-
195
- const client = new DocumentAnalysisClient(endpoint, new AzureKeyCredential(apiKey));
196
- const poller = await client.beginAnalyzeDocument(modelId, readStream, {
197
- onProgress: ({ status }) => {
198
- console.log(`status: ${status}`);
199
- },
200
- });
201
-
202
- // There are more fields than just these three
203
- const { documents, pages, tables } = await poller.pollUntilDone();
204
-
205
- console.log("Documents:");
206
- for (const document of documents || []) {
207
- console.log(`Type: ${document.docType}`);
208
- console.log("Fields:");
209
- for (const [name, field] of Object.entries(document.fields)) {
210
- console.log(
211
- `Field ${name} has value '${field.value}' with a confidence score of ${field.confidence}`,
212
- );
213
- }
214
- }
215
- console.log("Pages:");
216
- for (const page of pages || []) {
217
- console.log(`Page number: ${page.pageNumber} (${page.width}x${page.height} ${page.unit})`);
218
- }
189
+ ```ts snippet:ReadmeSampleAnalyzeDocumentWithModelId
190
+ import { DefaultAzureCredential } from "@azure/identity";
191
+ import { DocumentAnalysisClient } from "@azure/ai-form-recognizer";
192
+ import { createReadStream } from "node:fs";
219
193
 
220
- console.log("Tables:");
221
- for (const table of tables || []) {
222
- console.log(`- Table (${table.columnCount}x${table.rowCount})`);
223
- for (const cell of table.cells) {
224
- console.log(` - cell (${cell.rowIndex},${cell.columnIndex}) "${cell.content}"`);
225
- }
194
+ const credential = new DefaultAzureCredential();
195
+ const client = new DocumentAnalysisClient(
196
+ "https://<resource name>.cognitiveservices.azure.com",
197
+ credential,
198
+ );
199
+
200
+ const modelId = "<model id>";
201
+ const path = "<path to a document>";
202
+ const readStream = createReadStream(path);
203
+
204
+ const poller = await client.beginAnalyzeDocument(modelId, readStream, {
205
+ onProgress: ({ status }) => {
206
+ console.log(`status: ${status}`);
207
+ },
208
+ });
209
+
210
+ // There are more fields than just these three
211
+ const { documents, pages, tables } = await poller.pollUntilDone();
212
+
213
+ console.log("Documents:");
214
+ for (const document of documents || []) {
215
+ console.log(`Type: ${document.docType}`);
216
+ console.log("Fields:");
217
+ for (const [name, field] of Object.entries(document.fields)) {
218
+ console.log(
219
+ `Field ${name} has content '${field.content}' with a confidence score of ${field.confidence}`,
220
+ );
226
221
  }
227
222
  }
228
223
 
229
- main().catch((err) => {
230
- console.error("The sample encountered an error:", err);
231
- });
224
+ console.log("Pages:");
225
+ for (const page of pages || []) {
226
+ console.log(`Page number: ${page.pageNumber} (${page.width}x${page.height} ${page.unit})`);
227
+ }
228
+
229
+ console.log("Tables:");
230
+ for (const table of tables || []) {
231
+ console.log(`- Table (${table.columnCount}x${table.rowCount})`);
232
+ for (const cell of table.cells) {
233
+ console.log(` - cell (${cell.rowIndex},${cell.columnIndex}) "${cell.content}"`);
234
+ }
235
+ }
232
236
  ```
233
237
 
234
238
  #### Analyze a document from a URL
@@ -243,59 +247,52 @@ Example `DocumentModel` objects for the current service API version (`2022-08-31
243
247
 
244
248
  Since the main benefit of `DocumentModel`-based analysis is stronger TypeScript type constraints, the following sample is written in TypeScript using ECMAScript module syntax:
245
249
 
246
- ```typescript
247
- import { DocumentAnalysisClient, AzureKeyCredential } from "@azure/ai-form-recognizer";
248
-
249
- // Copy the file from the above-linked sample directory so that it can be imported in this module
250
- import { PrebuiltReceiptModel } from "./prebuilt/prebuilt-receipt";
250
+ ```ts snippet:ReadmeSamplePrebuiltReceipt
251
+ import { DefaultAzureCredential } from "@azure/identity";
252
+ import { DocumentAnalysisClient } from "@azure/ai-form-recognizer";
253
+ import { createReadStream } from "node:fs";
254
+ import { PrebuiltReceiptModel } from "../samples-dev/prebuilt/prebuilt-receipt.js";
251
255
 
252
- import fs from "fs";
253
-
254
- async function main() {
255
- const endpoint = "<cognitive services endpoint>";
256
- const apiKey = "<api key>";
257
- const path = "<path to your receipt document>"; // pdf/jpeg/png/tiff formats
258
-
259
- const readStream = fs.createReadStream(path);
256
+ const credential = new DefaultAzureCredential();
257
+ const client = new DocumentAnalysisClient(
258
+ "https://<resource name>.cognitiveservices.azure.com",
259
+ credential,
260
+ );
260
261
 
261
- const client = new DocumentAnalysisClient(endpoint, new AzureKeyCredential(apiKey));
262
+ const path = "<path to a document>";
263
+ const readStream = createReadStream(path);
262
264
 
263
- // The PrebuiltReceiptModel `DocumentModel` instance encodes both the model ID and a stronger return type for the operation
264
- const poller = await client.beginAnalyzeDocument(PrebuiltReceiptModel, readStream, {
265
- onProgress: ({ status }) => {
266
- console.log(`status: ${status}`);
267
- },
268
- });
265
+ // The PrebuiltReceiptModel `DocumentModel` instance encodes both the model ID and a stronger return type for the operation
266
+ const poller = await client.beginAnalyzeDocument(PrebuiltReceiptModel, readStream, {
267
+ onProgress: ({ status }) => {
268
+ console.log(`status: ${status}`);
269
+ },
270
+ });
269
271
 
270
- const {
271
- documents: [receiptDocument],
272
- } = await poller.pollUntilDone();
272
+ const {
273
+ documents: [receiptDocument],
274
+ } = await poller.pollUntilDone();
273
275
 
274
- // The fields of the document constitute the extracted receipt data.
275
- const receipt = receiptDocument.fields;
276
+ // The fields of the document constitute the extracted receipt data.
277
+ const receipt = receiptDocument.fields;
276
278
 
277
- if (receipt === undefined) {
278
- throw new Error("Expected at least one receipt in analysis result.");
279
- }
279
+ if (receipt === undefined) {
280
+ throw new Error("Expected at least one receipt in analysis result.");
281
+ }
280
282
 
281
- console.log(`Receipt data (${receiptDocument.docType})`);
282
- console.log(" Merchant Name:", receipt.merchantName?.value);
283
+ console.log(`Receipt data (${receiptDocument.docType})`);
284
+ console.log(" Merchant Name:", receipt.merchantName?.value);
283
285
 
284
- // The items of the receipt are an example of a `DocumentArrayValue`
285
- if (receipt.items !== undefined) {
286
- console.log("Items:");
287
- for (const { properties: item } of receipt.items.values) {
288
- console.log("- Description:", item.description?.value);
289
- console.log(" Total Price:", item.totalPrice?.value);
290
- }
286
+ // The items of the receipt are an example of a `DocumentArrayValue`
287
+ if (receipt.items !== undefined) {
288
+ console.log("Items:");
289
+ for (const { properties: item } of receipt.items.values) {
290
+ console.log("- Description:", item.description?.value);
291
+ console.log(" Total Price:", item.totalPrice?.value);
291
292
  }
292
-
293
- console.log(" Total:", receipt.total?.value);
294
293
  }
295
294
 
296
- main().catch((err) => {
297
- console.error("The sample encountered an error:", err);
298
- });
295
+ console.log(" Total:", receipt.total?.value);
299
296
  ```
300
297
 
301
298
  Alternatively, as mentioned above, instead of using `PrebuiltReceiptModel`, which produces the stronger return type, the prebuilt receipt's model ID ("prebuilt-receipt") can be used, but the document fields will not be strongly typed in TypeScript, and the field names will generally be in "PascalCase" instead of "camelCase".
@@ -327,40 +324,34 @@ The `"prebuilt-layout"` model extracts only the basic elements of the document,
327
324
 
328
325
  Since the main benefit of `DocumentModel`-based analysis is stronger TypeScript type constraints, the following sample is written in TypeScript using ECMAScript module syntax:
329
326
 
330
- ```typescript
331
- import { DocumentAnalysisClient, AzureKeyCredential } from "@azure/ai-form-recognizer";
332
-
333
- // Copy the above-linked `DocumentModel` file so that it may be imported in this module.
334
- import { PrebuiltLayoutModel } from "./prebuilt/prebuilt-layout";
327
+ ```ts snippet:ReadmeSamplePrebuiltLayout
328
+ import { DefaultAzureCredential } from "@azure/identity";
329
+ import { DocumentAnalysisClient } from "@azure/ai-form-recognizer";
330
+ import { createReadStream } from "node:fs";
331
+ import { PrebuiltLayoutModel } from "../samples-dev/prebuilt/prebuilt-layout.js";
335
332
 
336
- import fs from "fs";
337
-
338
- async function main() {
339
- const endpoint = "<cognitive services endpoint>";
340
- const apiKey = "<api key>";
341
- const path = "<path to a document>"; // pdf/jpeg/png/tiff formats
333
+ const credential = new DefaultAzureCredential();
334
+ const client = new DocumentAnalysisClient(
335
+ "https://<resource name>.cognitiveservices.azure.com",
336
+ credential,
337
+ );
342
338
 
343
- const readStream = fs.createReadStream(path);
339
+ const path = "<path to a document>";
340
+ const readStream = createReadStream(path);
344
341
 
345
- const client = new DocumentAnalysisClient(endpoint, new AzureKeyCredential(apiKey));
346
- const poller = await client.beginAnalyzeDocument(PrebuiltLayoutModel, readStream);
347
- const { pages, tables } = await poller.pollUntilDone();
342
+ const poller = await client.beginAnalyzeDocument(PrebuiltLayoutModel, readStream);
343
+ const { pages, tables } = await poller.pollUntilDone();
348
344
 
349
- for (const page of pages || []) {
350
- console.log(`- Page ${page.pageNumber}: (${page.width}x${page.height} ${page.unit})`);
351
- }
345
+ for (const page of pages || []) {
346
+ console.log(`- Page ${page.pageNumber}: (${page.width}x${page.height} ${page.unit})`);
347
+ }
352
348
 
353
- for (const table of tables || []) {
354
- console.log(`- Table (${table.columnCount}x${table.rowCount})`);
355
- for (const cell of table.cells) {
356
- console.log(` cell [${cell.rowIndex},${cell.columnIndex}] "${cell.content}"`);
357
- }
349
+ for (const table of tables || []) {
350
+ console.log(`- Table (${table.columnCount}x${table.rowCount})`);
351
+ for (const cell of table.cells) {
352
+ console.log(` cell [${cell.rowIndex},${cell.columnIndex}] "${cell.content}"`);
358
353
  }
359
354
  }
360
-
361
- main().catch((err) => {
362
- console.error("The sample encountered an error:", err);
363
- });
364
355
  ```
365
356
 
366
357
  ### Use the "document" prebuilt
@@ -369,42 +360,36 @@ The `"prebuilt-document"` model extracts information about key-value pairs (dire
369
360
 
370
361
  Since the main benefit of `DocumentModel`-based analysis is stronger TypeScript type constraints, the following sample is written in TypeScript using ECMAScript module syntax:
371
362
 
372
- ```typescript
373
- import { DocumentAnalysisClient, AzureKeyCredential } from "@azure/ai-form-recognizer";
374
-
375
- // Copy the above-linked `DocumentModel` file so that it may be imported in this module.
376
- import { PrebuiltDocumentModel } from "./prebuilt/prebuilt-document";
363
+ ```ts snippet:ReadmeSamplePrebuiltDocument
364
+ import { DefaultAzureCredential } from "@azure/identity";
365
+ import { DocumentAnalysisClient } from "@azure/ai-form-recognizer";
366
+ import { createReadStream } from "node:fs";
367
+ import { PrebuiltDocumentModel } from "../samples-dev/prebuilt/prebuilt-document.js";
377
368
 
378
- import fs from "fs";
379
-
380
- async function main() {
381
- const endpoint = "<cognitive services endpoint>";
382
- const apiKey = "<api key>";
383
- const path = "<path to a document>"; // pdf/jpeg/png/tiff formats
369
+ const credential = new DefaultAzureCredential();
370
+ const client = new DocumentAnalysisClient(
371
+ "https://<resource name>.cognitiveservices.azure.com",
372
+ credential,
373
+ );
384
374
 
385
- const readStream = fs.createReadStream(path);
375
+ const path = "<path to a document>";
376
+ const readStream = createReadStream(path);
386
377
 
387
- const client = new DocumentAnalysisClient(endpoint, new AzureKeyCredential(apiKey));
388
- const poller = await client.beginAnalyzeDocument(PrebuiltDocumentModel, readStream);
378
+ const poller = await client.beginAnalyzeDocument(PrebuiltDocumentModel, readStream);
389
379
 
390
- // `pages`, `tables` and `styles` are also available as in the "layout" example above, but for the sake of this
391
- // example we won't show them here.
392
- const { keyValuePairs } = await poller.pollUntilDone();
380
+ // `pages`, `tables` and `styles` are also available as in the "layout" example above, but for the sake of this
381
+ // example we won't show them here.
382
+ const { keyValuePairs } = await poller.pollUntilDone();
393
383
 
394
- if (!keyValuePairs || keyValuePairs.length <= 0) {
395
- console.log("No key-value pairs were extracted from the document.");
396
- } else {
397
- console.log("Key-Value Pairs:");
398
- for (const { key, value, confidence } of keyValuePairs) {
399
- console.log("- Key :", `"${key.content}"`);
400
- console.log(" Value:", `"${value?.content ?? "<undefined>"}" (${confidence})`);
401
- }
384
+ if (!keyValuePairs || keyValuePairs.length <= 0) {
385
+ console.log("No key-value pairs were extracted from the document.");
386
+ } else {
387
+ console.log("Key-Value Pairs:");
388
+ for (const { key, value, confidence } of keyValuePairs) {
389
+ console.log("- Key :", `"${key.content}"`);
390
+ console.log(" Value:", `"${value?.content ?? "<undefined>"}" (${confidence})`);
402
391
  }
403
392
  }
404
-
405
- main().catch((err) => {
406
- console.error("The sample encountered an error:", err);
407
- });
408
393
  ```
409
394
 
410
395
  ### Use the "read" prebuilt
@@ -413,73 +398,69 @@ The `"prebuilt-read"` model extracts textual information in a document such as w
413
398
 
414
399
  Since the main benefit of `DocumentModel`-based analysis is stronger TypeScript type constraints, the following sample is written in TypeScript using ECMAScript module syntax:
415
400
 
416
- ```typescript
417
- import { DocumentAnalysisClient, AzureKeyCredential } from "@azure/ai-form-recognizer";
418
-
419
- // Copy the above-linked `DocumentModel` file so that it may be imported in this module.
420
- import { PrebuiltReadModel } from "./prebuilt/prebuilt-read";
421
-
422
- // See the samples directory for a definition of this helper function.
423
- import { getTextOfSpans } from "./utils";
401
+ ```ts snippet:ReadmeSamplePrebuiltRead
402
+ import { DefaultAzureCredential } from "@azure/identity";
403
+ import { DocumentAnalysisClient } from "@azure/ai-form-recognizer";
404
+ import { createReadStream } from "node:fs";
405
+ import { PrebuiltReadModel } from "../samples-dev/prebuilt/prebuilt-read.js";
424
406
 
425
- import fs from "fs";
426
-
427
- async function main() {
428
- const endpoint = "<cognitive services endpoint>";
429
- const apiKey = "<api key>";
430
- const path = "<path to a document>"; // pdf/jpeg/png/tiff formats
407
+ const credential = new DefaultAzureCredential();
408
+ const client = new DocumentAnalysisClient(
409
+ "https://<resource name>.cognitiveservices.azure.com",
410
+ credential,
411
+ );
431
412
 
432
- const readStream = fs.createReadStream(path);
413
+ const path = "<path to a document>";
414
+ const readStream = createReadStream(path);
433
415
 
434
- const client = new DocumentAnalysisClient(endpoint, new AzureKeyCredential(apiKey));
435
- const poller = await client.beginAnalyzeDocument(PrebuiltReadModel, readStream);
416
+ const poller = await client.beginAnalyzeDocument(PrebuiltReadModel, readStream);
436
417
 
437
- // The "prebuilt-read" model (`beginReadDocument` method) only extracts information about the textual content of the
438
- // document, such as page text elements, text styles, and information about the language of the text.
439
- const { content, pages, languages } = await poller.pollUntilDone();
418
+ // The "prebuilt-read" model (`beginReadDocument` method) only extracts information about the textual content of the
419
+ // document, such as page text elements, text styles, and information about the language of the text.
420
+ const { content, pages, languages } = await poller.pollUntilDone();
440
421
 
441
- if (!pages || pages.length <= 0) {
442
- console.log("No pages were extracted from the document.");
443
- } else {
444
- console.log("Pages:");
445
- for (const page of pages) {
446
- console.log("- Page", page.pageNumber, `(unit: ${page.unit})`);
447
- console.log(` ${page.width}x${page.height}, angle: ${page.angle}`);
448
- console.log(
449
- ` ${page.lines && page.lines.length} lines, ${page.words && page.words.length} words`,
450
- );
422
+ if (!pages || pages.length <= 0) {
423
+ console.log("No pages were extracted from the document.");
424
+ } else {
425
+ console.log("Pages:");
426
+ for (const page of pages) {
427
+ console.log("- Page", page.pageNumber, `(unit: ${page.unit})`);
428
+ console.log(` ${page.width}x${page.height}, angle: ${page.angle}`);
429
+ console.log(
430
+ ` ${page.lines && page.lines.length} lines, ${page.words && page.words.length} words`,
431
+ );
451
432
 
452
- if (page.lines && page.lines.length > 0) {
453
- console.log(" Lines:");
433
+ if (page.lines && page.lines.length > 0) {
434
+ console.log(" Lines:");
454
435
 
455
- for (const line of page.lines) {
456
- console.log(` - "${line.content}"`);
457
- }
436
+ for (const line of page.lines) {
437
+ console.log(` - "${line.content}"`);
458
438
  }
459
439
  }
460
440
  }
441
+ }
461
442
 
462
- if (!languages || languages.length <= 0) {
463
- console.log("No language spans were extracted from the document.");
464
- } else {
465
- console.log("Languages:");
466
- for (const languageEntry of languages) {
467
- console.log(
468
- `- Found language: ${languageEntry.locale} (confidence: ${languageEntry.confidence})`,
469
- );
470
-
471
- for (const text of getTextOfSpans(content, languageEntry.spans)) {
472
- const escapedText = text.replace(/\r?\n/g, "\\n").replace(/"/g, '\\"');
473
- console.log(` - "${escapedText}"`);
474
- }
443
+ if (!languages || languages.length <= 0) {
444
+ console.log("No language spans were extracted from the document.");
445
+ } else {
446
+ console.log("Languages:");
447
+ for (const languageEntry of languages) {
448
+ console.log(
449
+ `- Found language: ${languageEntry.locale} (confidence: ${languageEntry.confidence})`,
450
+ );
451
+
452
+ for (const text of getTextOfSpans(content, languageEntry.spans)) {
453
+ const escapedText = text.replace(/\r?\n/g, "\\n").replace(/"/g, '\\"');
454
+ console.log(` - "${escapedText}"`);
475
455
  }
476
456
  }
477
457
  }
478
458
 
479
- main().catch((error) => {
480
- console.error("An error occurred:", error);
481
- process.exit(1);
482
- });
459
+ function* getTextOfSpans(content, spans) {
460
+ for (const span of spans) {
461
+ yield content.slice(span.offset, span.offset + span.length);
462
+ }
463
+ }
483
464
  ```
484
465
 
485
466
  ### Classify a document
@@ -488,37 +469,32 @@ The Document Intelligence service supports custom document classifiers that can
488
469
 
489
470
  The following sample shows how to classify a document using a custom classifier:
490
471
 
491
- ```javascript
492
- const { AzureKeyCredential, DocumentAnalysisClient } = require("@azure/ai-form-recognizer");
493
-
494
- async function main() {
495
- const endpoint = "<endpoint>";
496
- const credential = new AzureKeyCredential("<api key>");
472
+ ```ts snippet:ReadmeSampleClassifyDocument
473
+ import { DefaultAzureCredential } from "@azure/identity";
474
+ import { DocumentAnalysisClient } from "@azure/ai-form-recognizer";
497
475
 
498
- const documentUrl =
499
- "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/invoice/Invoice_1.pdf";
500
-
501
- const client = new DocumentAnalysisClient(endpoint, credential);
476
+ const credential = new DefaultAzureCredential();
477
+ const client = new DocumentAnalysisClient(
478
+ "https://<resource name>.cognitiveservices.azure.com",
479
+ credential,
480
+ );
502
481
 
503
- const poller = await client.beginClassifyDocumentFromUrl("<classifier id>", documentUrl);
482
+ const documentUrl =
483
+ "https://raw.githubusercontent.com/Azure/azure-sdk-for-js/main/sdk/formrecognizer/ai-form-recognizer/assets/invoice/Invoice_1.pdf";
504
484
 
505
- const result = await poller.pollUntilDone();
485
+ const poller = await client.beginClassifyDocumentFromUrl("<classifier id>", documentUrl);
506
486
 
507
- if (result.documents === undefined || result.documents.length === 0) {
508
- throw new Error("Failed to extract any documents.");
509
- }
487
+ const result = await poller.pollUntilDone();
510
488
 
511
- for (const document of result.documents) {
512
- console.log(
513
- `Extracted a document with type '${document.docType}' on page ${document.boundingRegions?.[0].pageNumber} (confidence: ${document.confidence})`,
514
- );
515
- }
489
+ if (result?.documents?.length === 0) {
490
+ throw new Error("Failed to extract any documents.");
516
491
  }
517
492
 
518
- main().catch((error) => {
519
- console.error("An error occurred:", error);
520
- process.exit(1);
521
- });
493
+ for (const document of result.documents) {
494
+ console.log(
495
+ `Extracted a document with type '${document.docType}' on page ${document.boundingRegions?.[0].pageNumber} (confidence: ${document.confidence})`,
496
+ );
497
+ }
522
498
  ```
523
499
 
524
500
  For information on training a custom classifier, see the [section on classifier training at the end of the next section](#build-classifier).
@@ -531,60 +507,54 @@ While we provide these methods for programmatic model creation, the Document Int
531
507
 
532
508
  For example, the following program builds a custom document model using a SAS-encoded URL to a pre-existing Azure Storage container:
533
509
 
534
- ```javascript
535
- const {
536
- DocumentModelAdministrationClient,
537
- AzureKeyCredential,
538
- } = require("@azure/ai-form-recognizer");
539
-
540
- async function main() {
541
- const endpoint = "<cognitive services endpoint>";
542
- const apiKey = "<api key>";
543
- const containerSasUrl = "<SAS url to the blob container storing training documents>";
544
-
545
- const client = new DocumentModelAdministrationClient(endpoint, new AzureKeyCredential(apiKey));
546
-
547
- // You must provide the model ID. It can be any text that does not start with "prebuilt-".
548
- // For example, you could provide a randomly generated GUID using the "uuid" package.
549
- // The second parameter is the SAS-encoded URL to an Azure Storage container with the training documents.
550
- // The third parameter is the build mode: one of "template" (the only mode prior to 4.0.0-beta.3) or "neural".
551
- // See https://aka.ms/azsdk/formrecognizer/buildmode for more information about build modes.
552
- const poller = await client.beginBuildDocumentModel("<model ID>", containerSasUrl, "template", {
553
- // The model description is optional and can be any text.
554
- description: "This is my new model!",
555
- onProgress: ({ status }) => {
556
- console.log(`operation status: ${status}`);
557
- },
558
- });
559
- const model = await poller.pollUntilDone();
560
-
561
- console.log("Model ID:", model.modelId);
562
- console.log("Description:", model.description);
563
- console.log("Created:", model.createdOn);
564
-
565
- // A model may contain several document types, which describe the possible object structures of fields extracted using
566
- // this model
567
-
568
- console.log("Document Types:");
569
- for (const [docType, { description, fieldSchema: schema }] of Object.entries(
570
- model.docTypes ?? {},
571
- )) {
572
- console.log(`- Name: "${docType}"`);
573
- console.log(` Description: "${description}"`);
574
-
575
- // For simplicity, this example will only show top-level field names
576
- console.log(" Fields:");
577
-
578
- for (const [fieldName, fieldSchema] of Object.entries(schema)) {
579
- console.log(` - "${fieldName}" (${fieldSchema.type})`);
580
- console.log(` ${fieldSchema.description ?? "<no description>"}`);
581
- }
582
- }
583
- }
510
+ ```ts snippet:ReadmeSampleBuildModel
511
+ import { DefaultAzureCredential } from "@azure/identity";
512
+ import { DocumentModelAdministrationClient } from "@azure/ai-form-recognizer";
513
+
514
+ const credential = new DefaultAzureCredential();
515
+ const client = new DocumentModelAdministrationClient(
516
+ "https://<resource name>.cognitiveservices.azure.com",
517
+ credential,
518
+ );
584
519
 
585
- main().catch((err) => {
586
- console.error("The sample encountered an error:", err);
520
+ const containerSasUrl = "<SAS url to the blob container storing training documents>";
521
+
522
+ // You must provide the model ID. It can be any text that does not start with "prebuilt-".
523
+ // For example, you could provide a randomly generated GUID using the "uuid" package.
524
+ // The second parameter is the SAS-encoded URL to an Azure Storage container with the training documents.
525
+ // The third parameter is the build mode: one of "template" (the only mode prior to 4.0.0-beta.3) or "neural".
526
+ // See https://aka.ms/azsdk/formrecognizer/buildmode for more information about build modes.
527
+ const poller = await client.beginBuildDocumentModel("<model ID>", containerSasUrl, "template", {
528
+ // The model description is optional and can be any text.
529
+ description: "This is my new model!",
530
+ onProgress: ({ status }) => {
531
+ console.log(`operation status: ${status}`);
532
+ },
587
533
  });
534
+ const model = await poller.pollUntilDone();
535
+
536
+ console.log(`Model ID: ${model.modelId}`);
537
+ console.log(`Description: ${model.description}`);
538
+ console.log(`Created: ${model.createdOn}`);
539
+
540
+ // A model may contain several document types, which describe the possible object structures of fields extracted using
541
+ // this model
542
+
543
+ console.log("Document Types:");
544
+ for (const [docType, { description, fieldSchema: schema }] of Object.entries(
545
+ model.docTypes ?? {},
546
+ )) {
547
+ console.log(`- Name: "${docType}"`);
548
+ console.log(` Description: "${description}"`);
549
+
550
+ // For simplicity, this example will only show top-level field names
551
+ console.log(" Fields:");
552
+
553
+ for (const [fieldName, fieldSchema] of Object.entries(schema)) {
554
+ console.log(` - "${fieldName}" (${fieldSchema.type})`);
555
+ console.log(` ${fieldSchema.description ?? "<no description>"}`);
556
+ }
557
+ }
588
558
  ```
589
559
 
590
560
  <a id="build-classifier"></a>
@@ -594,50 +564,44 @@ Custom classifiers are built in a similar way using the `beginBuildDocumentClass
594
564
 
595
565
  `DocumentModelAdministrationClient` also provides several methods for accessing and listing models. The following example shows how to iterate through the models in a resource (this will include both custom models in the resource as well as prebuilt models that are common to all resources), get a model by ID, and delete a model.
596
566
 
597
- ```javascript
598
- const {
599
- DocumentModelAdministrationClient,
600
- AzureKeyCredential,
601
- } = require("@azure/ai-form-recognizer");
602
-
603
- async function main() {
604
- const endpoint = "<cognitive services endpoint>";
605
- const apiKey = "<api key>";
606
- const client = new DocumentModelAdministrationClient(endpoint, new AzureKeyCredential(apiKey));
607
-
608
- // Produces an async iterable that supports paging (`PagedAsyncIterableIterator`). The `listDocumentModels` method will only
609
- // iterate over model summaries, which do not include detailed schema information. Schema information is only returned
610
- // from `getDocumentModel` as part of the full model information.
611
- const models = client.listDocumentModels();
612
- let i = 1;
613
- for await (const summary of models) {
614
- console.log(`Model ${i++}:`, summary);
615
- }
567
+ ```ts snippet:ReadmeSampleManageModels
568
+ import { DefaultAzureCredential } from "@azure/identity";
569
+ import { DocumentModelAdministrationClient } from "@azure/ai-form-recognizer";
616
570
 
617
- // The iterable is paged, and the application can control the flow of paging if needed
618
- i = 1;
619
- for await (const page of client.listDocumentModels().byPage()) {
620
- for (const summary of page) {
621
- console.log(`Model ${i++}`, summary);
622
- }
623
- }
571
+ const credential = new DefaultAzureCredential();
572
+ const client = new DocumentModelAdministrationClient(
573
+ "https://<resource name>.cognitiveservices.azure.com",
574
+ credential,
575
+ );
624
576
 
625
- // We can also get a full ModelInfo by ID. Here we only show the basic information. See the documentation and the
626
- // `getDocumentModel` sample program for information about the `docTypes` field, which contains the model's document type
627
- // schemas.
628
- const model = await client.getDocumentModel("<model ID>");
629
- console.log("ID", model.modelId);
630
- console.log("Created:", model.createdOn);
631
- console.log("Description: ", model.description ?? "<none>");
632
-
633
- // A model can also be deleted by its model ID. Once it is deleted, it CANNOT be recovered.
634
- const modelIdToDelete = "<model ID that should be deleted forever>";
635
- await client.deleteDocumentModel(modelIdToDelete);
577
+ // Produces an async iterable that supports paging (`PagedAsyncIterableIterator`). The `listDocumentModels` method will only
578
+ // iterate over model summaries, which do not include detailed schema information. Schema information is only returned
579
+ // from `getDocumentModel` as part of the full model information.
580
+ const models = client.listDocumentModels();
581
+ let i = 1;
582
+ for await (const summary of models) {
583
+ console.log(`Model ${i++}:`, summary);
636
584
  }
637
585
 
638
- main().catch((err) => {
639
- console.error("The sample encountered an error:", err);
640
- });
586
+ // The iterable is paged, and the application can control the flow of paging if needed
587
+ i = 1;
588
+ for await (const page of client.listDocumentModels().byPage()) {
589
+ for (const summary of page) {
590
+ console.log(`Model ${i++}`, summary);
591
+ }
592
+ }
593
+
594
+ // We can also get a full ModelInfo by ID. Here we only show the basic information. See the documentation and the
595
+ // `getDocumentModel` sample program for information about the `docTypes` field, which contains the model's document type
596
+ // schemas.
597
+ const model = await client.getDocumentModel("<model ID>");
598
+ console.log(`ID ${model.modelId}`);
599
+ console.log(`Created: ${model.createdOn}`);
600
+ console.log(`Description: ${model.description ?? "<none>"}`);
601
+
602
+ // A model can also be deleted by its model ID. Once it is deleted, it CANNOT be recovered.
603
+ const modelIdToDelete = "<model ID that should be deleted forever>";
604
+ await client.deleteDocumentModel(modelIdToDelete);
641
605
  ```
642
606
 
643
607
  Similar methods `listDocumentClassifiers` and `getDocumentClassifier` are available for listing and getting information about custom classifiers in addition to `deleteDocumentClassifier` for deleting custom classifiers.
@@ -650,8 +614,8 @@ For assistance with troubleshooting, see the [troubleshooting guide][trouble-sho
650
614
 
651
615
  Enabling logging may help uncover useful information about failures. In order to see a log of HTTP requests and responses, set the `AZURE_LOG_LEVEL` environment variable to `info`. Alternatively, logging can be enabled at runtime by calling `setLogLevel` in the `@azure/logger`:
652
616
 
653
- ```javascript
654
- const { setLogLevel } = require("@azure/logger");
617
+ ```ts snippet:SetLogLevel
618
+ import { setLogLevel } from "@azure/logger";
655
619
 
656
620
  setLogLevel("info");
657
621
  ```
@@ -666,8 +630,6 @@ Please take a look at the [samples](https://github.com/Azure/azure-sdk-for-js/tr
666
630
 
667
631
  If you'd like to contribute to this library, please read the [contributing guide](https://github.com/Azure/azure-sdk-for-js/blob/main/CONTRIBUTING.md) to learn more about how to build and test the code.
668
632
 
669
-
670
-
671
633
  [azure_cli]: https://learn.microsoft.com/cli/azure
672
634
  [azure_sub]: https://azure.microsoft.com/free/
673
635
  [fr_or_cs_resource]: https://learn.microsoft.com/azure/cognitive-services/cognitive-services-apis-create-account?tabs=multiservice%2Cwindows