datalab-python-sdk 0.2.2__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/PKG-INFO +1 -1
  2. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/datalab_sdk/__init__.py +11 -1
  3. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/datalab_sdk/cli.py +308 -28
  4. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/datalab_sdk/client.py +480 -31
  5. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/datalab_sdk/models.py +76 -7
  6. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/datalab_sdk/settings.py +1 -1
  7. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/pyproject.toml +1 -1
  8. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/tests/test_client_methods.py +315 -131
  9. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/uv.lock +1 -1
  10. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/.github/workflows/ci.yml +0 -0
  11. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/.github/workflows/publish.yml +0 -0
  12. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/.gitignore +0 -0
  13. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/.pre-commit-config.yaml +0 -0
  14. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/.python-version +0 -0
  15. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/LICENSE +0 -0
  16. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/README.md +0 -0
  17. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/data/08-Lambda-Calculus.pptx +0 -0
  18. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/data/adversarial.pdf +0 -0
  19. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/data/bid_evaluation.docx +0 -0
  20. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/data/book_review.ppt +0 -0
  21. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/data/book_store.xls +0 -0
  22. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/data/chi_hind.png +0 -0
  23. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/data/how_to_read.doc +0 -0
  24. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/data/normandy.epub +0 -0
  25. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/data/sample-1-sheet.xlsx +0 -0
  26. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/data/thinkpython.pdf +0 -0
  27. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/data/vibe.html +0 -0
  28. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/datalab_sdk/exceptions.py +0 -0
  29. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/datalab_sdk/mimetypes.py +0 -0
  30. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/integration/README.md +0 -0
  31. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/integration/__init__.py +0 -0
  32. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/integration/test_live_api.py +0 -0
  33. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/integration/test_readme_examples.py +0 -0
  34. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/poetry.lock +0 -0
  35. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/pytest.ini +0 -0
  36. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/README.md +0 -0
  37. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/end_to_end_workflow.py +0 -0
  38. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/workflow_api_tutorial/1_get_step_types.py +0 -0
  39. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/workflow_api_tutorial/2_get_workflows.py +0 -0
  40. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/workflow_api_tutorial/3_create_workflow.py +0 -0
  41. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/workflow_api_tutorial/4_execute_workflow.py +0 -0
  42. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/workflow_api_tutorial/5_poll_workflow.py +0 -0
  43. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/workflow_api_tutorial/README.md +0 -0
  44. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/workflow_definitions/README.md +0 -0
  45. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/workflow_definitions/eval_segmentation.json +0 -0
  46. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/workflow_definitions/parse_segment.json +0 -0
  47. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/workflow_definitions/segment_parallel_extract.json +0 -0
  48. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/recipes/workflows/workflow_definitions/slack_alert.json +0 -0
  49. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/tests/__init__.py +0 -0
  50. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/tests/conftest.py +0 -0
  51. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/tests/test_cli_simple.py +0 -0
  52. {datalab_python_sdk-0.2.2 → datalab_python_sdk-0.3.0}/tests/test_workflows.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datalab-python-sdk
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: SDK for the Datalab document intelligence API
5
5
  Author-email: Datalab Team <hi@datalab.to>
6
6
  License-Expression: MIT
@@ -2,15 +2,20 @@
2
2
  Datalab SDK - Python client for Datalab API
3
3
 
4
4
  This SDK provides both synchronous and asynchronous interfaces to the Datalab API,
5
- supporting document conversion, OCR, layout analysis, and table recognition.
5
+ supporting document conversion, extraction, segmentation, and more.
6
6
  """
7
7
 
8
8
  from .client import DatalabClient, AsyncDatalabClient
9
9
  from .exceptions import DatalabError, DatalabAPIError, DatalabTimeoutError
10
10
  from .models import (
11
11
  ConversionResult,
12
+ CreateDocumentResult,
12
13
  OCRResult,
13
14
  ConvertOptions,
15
+ ExtractOptions,
16
+ SegmentOptions,
17
+ CustomPipelineOptions,
18
+ TrackChangesOptions,
14
19
  OCROptions,
15
20
  FormFillingOptions,
16
21
  FormFillingResult,
@@ -30,8 +35,13 @@ __all__ = [
30
35
  "DatalabAPIError",
31
36
  "DatalabTimeoutError",
32
37
  "ConversionResult",
38
+ "CreateDocumentResult",
33
39
  "OCRResult",
34
40
  "ConvertOptions",
41
+ "ExtractOptions",
42
+ "SegmentOptions",
43
+ "CustomPipelineOptions",
44
+ "TrackChangesOptions",
35
45
  "OCROptions",
36
46
  "FormFillingOptions",
37
47
  "FormFillingResult",
@@ -16,6 +16,10 @@ from datalab_sdk.mimetypes import SUPPORTED_EXTENSIONS
16
16
  from datalab_sdk.models import (
17
17
  OCROptions,
18
18
  ConvertOptions,
19
+ ExtractOptions,
20
+ SegmentOptions,
21
+ CustomPipelineOptions,
22
+ TrackChangesOptions,
19
23
  ProcessingOptions,
20
24
  WorkflowStep,
21
25
  InputConfig,
@@ -60,7 +64,7 @@ def common_options(func):
60
64
 
61
65
 
62
66
  def marker_options(func):
63
- """Options specific to marker/convert command"""
67
+ """Options specific to convert command"""
64
68
  func = click.option(
65
69
  "--format",
66
70
  "output_format",
@@ -79,17 +83,14 @@ def marker_options(func):
79
83
  is_flag=True,
80
84
  help="Disable synthetic image captions/descriptions in output",
81
85
  )(func)
82
- func = click.option(
83
- "--page_schema", help="Schema to set to do structured extraction"
84
- )(func)
85
86
  func = click.option(
86
87
  "--add_block_ids", is_flag=True, help="Add block IDs to HTML output"
87
88
  )(func)
88
89
  func = click.option(
89
90
  "--mode",
90
91
  type=click.Choice(["fast", "balanced", "accurate"]),
91
- default="balanced",
92
- help="OCR mode",
92
+ default="fast",
93
+ help="Processing mode",
93
94
  )(func)
94
95
  return func
95
96
 
@@ -125,22 +126,27 @@ async def process_files_async(
125
126
 
126
127
  async def call_api(client, file_path, output_path):
127
128
  """Make API call - client handles retries for rate limits"""
128
- if method == "convert":
129
- return await client.convert(
130
- file_path,
131
- options=options,
132
- save_output=output_path,
133
- max_polls=max_polls,
134
- poll_interval=poll_interval,
135
- )
136
- else: # method == 'ocr'
137
- return await client.ocr(
138
- file_path,
129
+ api_method = getattr(client, method)
130
+ # For extract/segment with checkpoint_id, don't pass file_path
131
+ has_checkpoint = (
132
+ options is not None
133
+ and hasattr(options, "checkpoint_id")
134
+ and options.checkpoint_id is not None
135
+ )
136
+ if has_checkpoint:
137
+ return await api_method(
139
138
  options=options,
140
139
  save_output=output_path,
141
140
  max_polls=max_polls,
142
141
  poll_interval=poll_interval,
143
142
  )
143
+ return await api_method(
144
+ file_path,
145
+ options=options,
146
+ save_output=output_path,
147
+ max_polls=max_polls,
148
+ poll_interval=poll_interval,
149
+ )
144
150
 
145
151
  async def process_single_file(file_path: Path) -> dict:
146
152
  async with semaphore:
@@ -260,9 +266,18 @@ def process_documents(
260
266
  paginate: bool = False,
261
267
  disable_image_extraction: bool = False,
262
268
  disable_image_captions: bool = False,
263
- page_schema: Optional[str] = None,
264
269
  add_block_ids: bool = False,
265
- mode: str = "balanced",
270
+ mode: str = "fast",
271
+ # Extract-specific
272
+ page_schema: Optional[str] = None,
273
+ checkpoint_id: Optional[str] = None,
274
+ # Segment-specific
275
+ segmentation_schema: Optional[str] = None,
276
+ # Custom pipeline-specific
277
+ pipeline_id: Optional[str] = None,
278
+ run_eval: bool = False,
279
+ # Options object override
280
+ options_override: Optional[ProcessingOptions] = None,
266
281
  ):
267
282
  """Unified document processing function"""
268
283
  try:
@@ -278,7 +293,7 @@ def process_documents(
278
293
  if base_url is None:
279
294
  base_url = settings.DATALAB_HOST
280
295
 
281
- output_dir = setup_output_directory(output_dir)
296
+ output_dir_path = setup_output_directory(output_dir)
282
297
  file_extensions = parse_extensions(extensions)
283
298
 
284
299
  # Get files to process
@@ -292,19 +307,57 @@ def process_documents(
292
307
  click.echo(f"Found {len(to_process)} files to process")
293
308
 
294
309
  # Create processing options based on method
295
- if method == "convert":
310
+ if options_override:
311
+ options = options_override
312
+ elif method == "convert":
296
313
  options = ConvertOptions(
297
- output_format=output_format,
314
+ output_format=output_format or "markdown",
298
315
  max_pages=max_pages,
299
316
  paginate=paginate,
300
317
  disable_image_extraction=disable_image_extraction,
301
318
  disable_image_captions=disable_image_captions,
302
319
  page_range=page_range,
303
320
  skip_cache=skip_cache,
304
- page_schema=page_schema,
305
321
  add_block_ids=add_block_ids,
306
322
  mode=mode,
307
323
  )
324
+ elif method == "extract":
325
+ options = ExtractOptions(
326
+ page_schema=page_schema or "",
327
+ checkpoint_id=checkpoint_id,
328
+ mode=mode,
329
+ output_format=output_format or "markdown",
330
+ max_pages=max_pages,
331
+ page_range=page_range,
332
+ skip_cache=skip_cache,
333
+ )
334
+ elif method == "segment":
335
+ options = SegmentOptions(
336
+ segmentation_schema=segmentation_schema or "",
337
+ checkpoint_id=checkpoint_id,
338
+ mode=mode,
339
+ max_pages=max_pages,
340
+ page_range=page_range,
341
+ skip_cache=skip_cache,
342
+ )
343
+ elif method == "run_custom_pipeline":
344
+ options = CustomPipelineOptions(
345
+ pipeline_id=pipeline_id or "",
346
+ run_eval=run_eval,
347
+ mode=mode,
348
+ output_format=output_format or "markdown",
349
+ max_pages=max_pages,
350
+ page_range=page_range,
351
+ skip_cache=skip_cache,
352
+ )
353
+ elif method == "track_changes":
354
+ options = TrackChangesOptions(
355
+ output_format=output_format or "markdown,html,chunks",
356
+ paginate=paginate,
357
+ max_pages=max_pages,
358
+ page_range=page_range,
359
+ skip_cache=skip_cache,
360
+ )
308
361
  else: # method == "ocr"
309
362
  options = OCROptions(
310
363
  max_pages=max_pages,
@@ -315,7 +368,7 @@ def process_documents(
315
368
  results = asyncio.run(
316
369
  process_files_async(
317
370
  to_process,
318
- output_dir,
371
+ output_dir_path,
319
372
  method,
320
373
  options=options,
321
374
  max_concurrent=max_concurrent,
@@ -327,8 +380,16 @@ def process_documents(
327
380
  )
328
381
 
329
382
  # Show results
330
- operation = "Conversion" if method == "convert" else "OCR"
331
- show_results(results, operation, output_dir)
383
+ operation_names = {
384
+ "convert": "Conversion",
385
+ "extract": "Extraction",
386
+ "segment": "Segmentation",
387
+ "run_custom_pipeline": "Custom Pipeline",
388
+ "track_changes": "Track Changes",
389
+ "ocr": "OCR",
390
+ }
391
+ operation = operation_names.get(method, method.title())
392
+ show_results(results, operation, output_dir_path)
332
393
 
333
394
  except DatalabError as e:
334
395
  click.echo(f"Error: {e}", err=True)
@@ -361,7 +422,6 @@ def convert(
361
422
  paginate: bool,
362
423
  disable_image_extraction: bool,
363
424
  disable_image_captions: bool,
364
- page_schema: Optional[str],
365
425
  add_block_ids: bool,
366
426
  mode: str,
367
427
  ):
@@ -383,12 +443,227 @@ def convert(
383
443
  paginate=paginate,
384
444
  disable_image_extraction=disable_image_extraction,
385
445
  disable_image_captions=disable_image_captions,
386
- page_schema=page_schema,
387
446
  add_block_ids=add_block_ids,
388
447
  mode=mode,
389
448
  )
390
449
 
391
450
 
451
+ @click.command()
452
+ @click.argument("path", type=click.Path(exists=True))
453
+ @click.option("--page_schema", required=True, help="JSON schema for structured extraction (must contain 'properties' key)")
454
+ @click.option("--checkpoint_id", help="Checkpoint ID from a previous convert (skips re-parsing)")
455
+ @click.option("--format", "output_format", default="markdown", type=click.Choice(["markdown", "html", "json", "chunks"]), help="Output format")
456
+ @click.option("--mode", type=click.Choice(["fast", "balanced", "accurate"]), default="fast", help="Processing mode")
457
+ @common_options
458
+ def extract(
459
+ path: str,
460
+ page_schema: str,
461
+ checkpoint_id: Optional[str],
462
+ output_format: str,
463
+ mode: str,
464
+ api_key: str,
465
+ output_dir: str,
466
+ max_pages: Optional[int],
467
+ extensions: Optional[str],
468
+ max_concurrent: int,
469
+ base_url: str,
470
+ page_range: Optional[str],
471
+ skip_cache: bool,
472
+ max_polls: int,
473
+ poll_interval: int,
474
+ ):
475
+ """Extract structured data from documents using a JSON schema"""
476
+ process_documents(
477
+ path=path,
478
+ method="extract",
479
+ api_key=api_key,
480
+ output_dir=output_dir,
481
+ max_pages=max_pages,
482
+ extensions=extensions,
483
+ max_concurrent=max_concurrent,
484
+ base_url=base_url,
485
+ page_range=page_range,
486
+ skip_cache=skip_cache,
487
+ max_polls=max_polls,
488
+ poll_interval=poll_interval,
489
+ output_format=output_format,
490
+ mode=mode,
491
+ page_schema=page_schema,
492
+ checkpoint_id=checkpoint_id,
493
+ )
494
+
495
+
496
+ @click.command()
497
+ @click.argument("path", type=click.Path(exists=True))
498
+ @click.option("--segmentation_schema", required=True, help="JSON schema with segment names and descriptions")
499
+ @click.option("--checkpoint_id", help="Checkpoint ID from a previous convert (skips re-parsing)")
500
+ @click.option("--mode", type=click.Choice(["fast", "balanced", "accurate"]), default="fast", help="Processing mode")
501
+ @common_options
502
+ def segment(
503
+ path: str,
504
+ segmentation_schema: str,
505
+ checkpoint_id: Optional[str],
506
+ mode: str,
507
+ api_key: str,
508
+ output_dir: str,
509
+ max_pages: Optional[int],
510
+ extensions: Optional[str],
511
+ max_concurrent: int,
512
+ base_url: str,
513
+ page_range: Optional[str],
514
+ skip_cache: bool,
515
+ max_polls: int,
516
+ poll_interval: int,
517
+ ):
518
+ """Segment documents into sections using a schema"""
519
+ process_documents(
520
+ path=path,
521
+ method="segment",
522
+ api_key=api_key,
523
+ output_dir=output_dir,
524
+ max_pages=max_pages,
525
+ extensions=extensions,
526
+ max_concurrent=max_concurrent,
527
+ base_url=base_url,
528
+ page_range=page_range,
529
+ skip_cache=skip_cache,
530
+ max_polls=max_polls,
531
+ poll_interval=poll_interval,
532
+ mode=mode,
533
+ segmentation_schema=segmentation_schema,
534
+ checkpoint_id=checkpoint_id,
535
+ )
536
+
537
+
538
+ @click.command("custom-pipeline")
539
+ @click.argument("path", type=click.Path(exists=True))
540
+ @click.option("--pipeline_id", required=True, help="Custom pipeline ID to execute (cp_XXXXX format)")
541
+ @click.option("--run_eval", is_flag=True, help="Run evaluation rules for this pipeline")
542
+ @click.option("--format", "output_format", default="markdown", type=click.Choice(["markdown", "html", "json", "chunks"]), help="Output format")
543
+ @click.option("--mode", type=click.Choice(["fast", "balanced", "accurate"]), default="fast", help="Processing mode")
544
+ @common_options
545
+ def custom_pipeline(
546
+ path: str,
547
+ pipeline_id: str,
548
+ run_eval: bool,
549
+ output_format: str,
550
+ mode: str,
551
+ api_key: str,
552
+ output_dir: str,
553
+ max_pages: Optional[int],
554
+ extensions: Optional[str],
555
+ max_concurrent: int,
556
+ base_url: str,
557
+ page_range: Optional[str],
558
+ skip_cache: bool,
559
+ max_polls: int,
560
+ poll_interval: int,
561
+ ):
562
+ """Run a custom pipeline on documents"""
563
+ process_documents(
564
+ path=path,
565
+ method="run_custom_pipeline",
566
+ api_key=api_key,
567
+ output_dir=output_dir,
568
+ max_pages=max_pages,
569
+ extensions=extensions,
570
+ max_concurrent=max_concurrent,
571
+ base_url=base_url,
572
+ page_range=page_range,
573
+ skip_cache=skip_cache,
574
+ max_polls=max_polls,
575
+ poll_interval=poll_interval,
576
+ output_format=output_format,
577
+ mode=mode,
578
+ pipeline_id=pipeline_id,
579
+ run_eval=run_eval,
580
+ )
581
+
582
+
583
+ @click.command("track-changes")
584
+ @click.argument("path", type=click.Path(exists=True))
585
+ @click.option("--format", "output_format", default="markdown,html,chunks", help="Comma-separated output formats (markdown, html, chunks)")
586
+ @click.option("--paginate", is_flag=True, help="Separate output by page")
587
+ @common_options
588
+ def track_changes(
589
+ path: str,
590
+ output_format: str,
591
+ paginate: bool,
592
+ api_key: str,
593
+ output_dir: str,
594
+ max_pages: Optional[int],
595
+ extensions: Optional[str],
596
+ max_concurrent: int,
597
+ base_url: str,
598
+ page_range: Optional[str],
599
+ skip_cache: bool,
600
+ max_polls: int,
601
+ poll_interval: int,
602
+ ):
603
+ """Extract tracked changes from DOCX documents"""
604
+ process_documents(
605
+ path=path,
606
+ method="track_changes",
607
+ api_key=api_key,
608
+ output_dir=output_dir,
609
+ max_pages=max_pages,
610
+ extensions=extensions,
611
+ max_concurrent=max_concurrent,
612
+ base_url=base_url,
613
+ page_range=page_range,
614
+ skip_cache=skip_cache,
615
+ max_polls=max_polls,
616
+ poll_interval=poll_interval,
617
+ output_format=output_format,
618
+ paginate=paginate,
619
+ )
620
+
621
+
622
+ @click.command("create-document")
623
+ @click.option("--markdown", "markdown_input", required=True, help="Markdown content or path to markdown file")
624
+ @click.option("--output", "-o", "output_path", required=True, type=click.Path(), help="Output file path for the DOCX")
625
+ @click.option("--api_key", required=False, help="Datalab API key")
626
+ @click.option("--base_url", default=settings.DATALAB_HOST, help="API base URL")
627
+ def create_document(
628
+ markdown_input: str,
629
+ output_path: str,
630
+ api_key: Optional[str],
631
+ base_url: str,
632
+ ):
633
+ """Create a DOCX document from markdown"""
634
+ try:
635
+ if api_key is None:
636
+ api_key = settings.DATALAB_API_KEY
637
+
638
+ if api_key is None:
639
+ raise DatalabError(
640
+ "You must either pass in an api key via --api_key or set the DATALAB_API_KEY env variable."
641
+ )
642
+
643
+ # Check if markdown_input is a file path
644
+ md_path = Path(markdown_input)
645
+ if md_path.exists() and md_path.is_file():
646
+ markdown_content = md_path.read_text(encoding="utf-8")
647
+ else:
648
+ markdown_content = markdown_input
649
+
650
+ client = DatalabClient(api_key=api_key, base_url=base_url)
651
+ result = client.create_document(
652
+ markdown=markdown_content,
653
+ save_output=output_path,
654
+ )
655
+
656
+ if result.success:
657
+ click.echo(f"Document created successfully: {Path(output_path).with_suffix('.docx')}")
658
+ else:
659
+ click.echo(f"Document creation failed: {result.error}", err=True)
660
+ sys.exit(1)
661
+
662
+ except DatalabError as e:
663
+ click.echo(f"Error: {e}", err=True)
664
+ sys.exit(1)
665
+
666
+
392
667
  # Workflow commands
393
668
  @click.command()
394
669
  @click.option("--name", required=True, help="Name of the workflow")
@@ -869,6 +1144,11 @@ def _render_dag_simple(layers, children, step_map):
869
1144
 
870
1145
  # Add commands to CLI group
871
1146
  cli.add_command(convert)
1147
+ cli.add_command(extract)
1148
+ cli.add_command(segment)
1149
+ cli.add_command(custom_pipeline)
1150
+ cli.add_command(track_changes)
1151
+ cli.add_command(create_document)
872
1152
  cli.add_command(create_workflow)
873
1153
  cli.add_command(get_workflow)
874
1154
  cli.add_command(get_step_types)