doc-intelligence 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. doc_intelligence-0.0.7/.github/workflows/deploy-docs.yml +40 -0
  2. doc_intelligence-0.0.7/.github/workflows/release.yml +29 -0
  3. doc_intelligence-0.0.7/.gitignore +13 -0
  4. doc_intelligence-0.0.7/.pre-commit-config.yaml +17 -0
  5. doc_intelligence-0.0.7/.python-version +1 -0
  6. doc_intelligence-0.0.7/.vscode/settings.json +15 -0
  7. doc_intelligence-0.0.7/PKG-INFO +151 -0
  8. doc_intelligence-0.0.7/README.md +133 -0
  9. doc_intelligence-0.0.7/docs/index.md +5 -0
  10. doc_intelligence-0.0.7/docs/quickstart.md +125 -0
  11. doc_intelligence-0.0.7/document_ai/__init__.py +1 -0
  12. doc_intelligence-0.0.7/document_ai/base.py +64 -0
  13. doc_intelligence-0.0.7/document_ai/config.py +7 -0
  14. doc_intelligence-0.0.7/document_ai/extractor.py +76 -0
  15. doc_intelligence-0.0.7/document_ai/formatter.py +62 -0
  16. doc_intelligence-0.0.7/document_ai/llm.py +49 -0
  17. doc_intelligence-0.0.7/document_ai/parser.py +36 -0
  18. doc_intelligence-0.0.7/document_ai/processer.py +109 -0
  19. doc_intelligence-0.0.7/document_ai/pydantic_to_json_instance_schema.py +408 -0
  20. doc_intelligence-0.0.7/document_ai/schemas/__init__.py +0 -0
  21. doc_intelligence-0.0.7/document_ai/schemas/core.py +42 -0
  22. doc_intelligence-0.0.7/document_ai/schemas/pdf.py +31 -0
  23. doc_intelligence-0.0.7/document_ai/types/pdf.py +6 -0
  24. doc_intelligence-0.0.7/document_ai/utils.py +292 -0
  25. doc_intelligence-0.0.7/main.py +6 -0
  26. doc_intelligence-0.0.7/mkdocs.yml +50 -0
  27. doc_intelligence-0.0.7/notebooks/architecture.drawio +145 -0
  28. doc_intelligence-0.0.7/notebooks/general.ipynb +532 -0
  29. doc_intelligence-0.0.7/notebooks/lab.ipynb +255 -0
  30. doc_intelligence-0.0.7/notebooks/pdf/digital_pdf.ipynb +257 -0
  31. doc_intelligence-0.0.7/pyproject.toml +44 -0
  32. doc_intelligence-0.0.7/tests/__init__.py +0 -0
  33. doc_intelligence-0.0.7/tests/test_pydantic_to_json_instance_schema.py +483 -0
  34. doc_intelligence-0.0.7/uv.lock +2630 -0
@@ -0,0 +1,40 @@
1
+ name: Deploy MkDocs to GitHub Pages
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ workflow_dispatch:
8
+
9
+ permissions:
10
+ contents: write
11
+
12
+ jobs:
13
+ deploy:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - name: Checkout repository
17
+ uses: actions/checkout@v4
18
+ with:
19
+ fetch-depth: 0
20
+
21
+ - name: Setup Python
22
+ uses: actions/setup-python@v5
23
+ with:
24
+ python-version: '3.10'
25
+
26
+ - name: Install uv
27
+ uses: astral-sh/setup-uv@v5
28
+ with:
29
+ enable-cache: true
30
+
31
+ - name: Install dependencies
32
+ run: uv sync
33
+
34
+ - name: Configure Git
35
+ run: |
36
+ git config user.name github-actions
37
+ git config user.email github-actions@github.com
38
+
39
+ - name: Deploy to GitHub Pages
40
+ run: uv run mkdocs gh-deploy --force
@@ -0,0 +1,29 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ jobs:
9
+ release:
10
+ name: Publish to PyPI
11
+ runs-on: ubuntu-latest
12
+ environment: pypi
13
+ permissions:
14
+ id-token: write
15
+ steps:
16
+ - name: Checkout repository
17
+ uses: actions/checkout@v6
18
+
19
+ - name: Install uv and setup the python version
20
+ uses: astral-sh/setup-uv@v7
21
+
22
+ - name: Install Python 3.10
23
+ run: uv python install 3.10
24
+
25
+ - name: Build
26
+ run: uv build
27
+
28
+ - name: Publish package
29
+ run: uv publish
@@ -0,0 +1,13 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+ .env
12
+
13
+ data/
@@ -0,0 +1,17 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ # Ruff version.
4
+ rev: v0.14.13
5
+ hooks:
6
+ # Run the linter.
7
+ - id: ruff-check
8
+ args: [ --fix ]
9
+ # Run the formatter.
10
+ - id: ruff-format
11
+ # - repo: https://github.com/facebook/pyrefly-pre-commit
12
+ # rev: 0.48.1 # Note: this is the version of the pre-commit hook, NOT the pyrefly version used for type checking
13
+ # hooks:
14
+ # - id: pyrefly-check
15
+ # name: Pyrefly (type checking)
16
+ # pass_filenames: false # Recommended to do full repo checks. However, you can change this to `true` to only check changed files
17
+ # language: system # Use system-installed pyrefly
@@ -0,0 +1 @@
1
+ 3.10
@@ -0,0 +1,15 @@
1
+ {
2
+ "[python]": {
3
+ "editor.formatOnSave": true,
4
+ "editor.codeActionsOnSave": {
5
+ "source.fixAll": "explicit",
6
+ "source.organizeImports": "explicit"
7
+ },
8
+ "editor.defaultFormatter": "charliermarsh.ruff"
9
+ },
10
+ "ruff.enable": true,
11
+ "ruff.organizeImports": true,
12
+ "ruff.fixAll": true
13
+ }
14
+
15
+
@@ -0,0 +1,151 @@
1
+ Metadata-Version: 2.4
2
+ Name: doc-intelligence
3
+ Version: 0.0.7
4
+ Summary: Document AI - Intelligent document processing and extraction
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: google-genai>=1.57.0
7
+ Requires-Dist: langchain>=1.2.9
8
+ Requires-Dist: loguru>=0.7.3
9
+ Requires-Dist: mkdocs-material>=9.7.1
10
+ Requires-Dist: mkdocs>=1.6.1
11
+ Requires-Dist: openai>=2.15.0
12
+ Requires-Dist: pdfplumber>=0.11.9
13
+ Requires-Dist: pre-commit>=4.5.1
14
+ Requires-Dist: pymupdf>=1.26.7
15
+ Requires-Dist: pytest>=9.0.2
16
+ Requires-Dist: python-dotenv>=1.2.1
17
+ Description-Content-Type: text/markdown
18
+
19
+ # Document AI
20
+
21
+ **Documentation:** [https://zeel-04.github.io/document-ai/](https://zeel-04.github.io/document-ai/)
22
+
23
+ A library for parsing, formatting, and processing documents that can be used to build AI-powered document processing pipelines with structured data extraction and citation support.
24
+
25
+ ## Features
26
+
27
+ - Extract structured data from PDF documents using LLMs
28
+ - Automatic citation tracking with page numbers, line numbers, and bounding boxes
29
+ - Support for digital PDFs
30
+ - Type-safe data models using Pydantic
31
+ - OpenAI integration with support for reasoning models
32
+
33
+ ## Installation
34
+
35
+ ### Requirements
36
+
37
+ - Python >= 3.10
38
+ - OpenAI API key
39
+
40
+ ### Install uv
41
+
42
+ First, install [uv](https://docs.astral.sh/uv/) if you haven't already:
43
+
44
+ ```bash
45
+ curl -LsSf https://astral.sh/uv/install.sh | sh
46
+ ```
47
+
48
+ ### Install from Source
49
+
50
+ Clone the repository and install the package:
51
+
52
+ ```bash
53
+ git clone https://github.com/zeel-04/document-ai.git
54
+ cd document-ai
55
+ uv sync
56
+ ```
57
+
58
+ ### Install from Git (Alternative)
59
+
60
+ You can also install directly from the git repository:
61
+
62
+ ```bash
63
+ uv pip install git+https://github.com/zeel-04/document-ai.git
64
+ ```
65
+
66
+ ## Quick Start
67
+
68
+ Set up your OpenAI API key:
69
+
70
+ ```bash
71
+ echo "OPENAI_API_KEY=your-api-key-here" > .env
72
+ ```
73
+
74
+ Here's a simple example to extract structured data from a PDF:
75
+
76
+ ```python
77
+ from dotenv import load_dotenv
78
+ from document_ai.processer import DocumentProcessor
79
+ from document_ai.llm import OpenAILLM
80
+ from pydantic import BaseModel
81
+
82
+ # Load environment variables
83
+ load_dotenv()
84
+
85
+ # Initialize the LLM
86
+ llm = OpenAILLM()
87
+
88
+ # Create a processor from a PDF file
89
+ processor = DocumentProcessor.from_digital_pdf(
90
+ uri="path/to/your/document.pdf",
91
+ llm=llm,
92
+ )
93
+
94
+ # Define your data model
95
+ class Balance(BaseModel):
96
+ ending_balance: float
97
+
98
+ # Configure extraction with citations
99
+ config = {
100
+ "response_format": Balance,
101
+ "llm_config": {
102
+ "model": "gpt-5",
103
+ "reasoning": {"effort": "minimal"},
104
+ },
105
+ "extraction_config": {
106
+ "include_citations": True,
107
+ "extraction_mode": "single_pass",
108
+ "page_numbers": [0, 1], # Optional: specify which pages to process
109
+ }
110
+ }
111
+
112
+ # Extract structured data
113
+ response = processor.extract(config)
114
+
115
+ # Get the extracted data and citations
116
+ data, citations = response
117
+ print(f"Extracted data: {data}")
118
+ print(f"Citations: {citations}")
119
+ ```
120
+
121
+ ### Sample Output
122
+
123
+ The `extract` method returns a tuple containing the extracted data and citation information:
124
+
125
+ ```python
126
+ (Balance(ending_balance=111.61),
127
+ {'ending_balance': {'value': 111.61,
128
+ 'citations': [{'page': 0,
129
+ 'bboxes': [{'x0': 0.058823529411764705,
130
+ 'top': 0.6095707475757575,
131
+ 'x1': 0.5635455037254902,
132
+ 'bottom': 0.6221969596969696}]}]}})
133
+ ```
134
+
135
+ ## Documentation
136
+
137
+ For more detailed documentation, see the [docs](./docs) directory or visit the [documentation site](https://zeel-04.github.io/document-ai/).
138
+
139
+ ## Development Setup
140
+
141
+ Prerequisites:
142
+
143
+ - Python 3.10+
144
+ - uv
145
+
146
+ ```bash
147
+ git clone https://github.com/zeel-04/document-ai.git
148
+ cd document-ai
149
+ uv venv
150
+ uv sync
151
+ ```
@@ -0,0 +1,133 @@
1
+ # Document AI
2
+
3
+ **Documentation:** [https://zeel-04.github.io/document-ai/](https://zeel-04.github.io/document-ai/)
4
+
5
+ A library for parsing, formatting, and processing documents that can be used to build AI-powered document processing pipelines with structured data extraction and citation support.
6
+
7
+ ## Features
8
+
9
+ - Extract structured data from PDF documents using LLMs
10
+ - Automatic citation tracking with page numbers, line numbers, and bounding boxes
11
+ - Support for digital PDFs
12
+ - Type-safe data models using Pydantic
13
+ - OpenAI integration with support for reasoning models
14
+
15
+ ## Installation
16
+
17
+ ### Requirements
18
+
19
+ - Python >= 3.10
20
+ - OpenAI API key
21
+
22
+ ### Install uv
23
+
24
+ First, install [uv](https://docs.astral.sh/uv/) if you haven't already:
25
+
26
+ ```bash
27
+ curl -LsSf https://astral.sh/uv/install.sh | sh
28
+ ```
29
+
30
+ ### Install from Source
31
+
32
+ Clone the repository and install the package:
33
+
34
+ ```bash
35
+ git clone https://github.com/zeel-04/document-ai.git
36
+ cd document-ai
37
+ uv sync
38
+ ```
39
+
40
+ ### Install from Git (Alternative)
41
+
42
+ You can also install directly from the git repository:
43
+
44
+ ```bash
45
+ uv pip install git+https://github.com/zeel-04/document-ai.git
46
+ ```
47
+
48
+ ## Quick Start
49
+
50
+ Set up your OpenAI API key:
51
+
52
+ ```bash
53
+ echo "OPENAI_API_KEY=your-api-key-here" > .env
54
+ ```
55
+
56
+ Here's a simple example to extract structured data from a PDF:
57
+
58
+ ```python
59
+ from dotenv import load_dotenv
60
+ from document_ai.processer import DocumentProcessor
61
+ from document_ai.llm import OpenAILLM
62
+ from pydantic import BaseModel
63
+
64
+ # Load environment variables
65
+ load_dotenv()
66
+
67
+ # Initialize the LLM
68
+ llm = OpenAILLM()
69
+
70
+ # Create a processor from a PDF file
71
+ processor = DocumentProcessor.from_digital_pdf(
72
+ uri="path/to/your/document.pdf",
73
+ llm=llm,
74
+ )
75
+
76
+ # Define your data model
77
+ class Balance(BaseModel):
78
+ ending_balance: float
79
+
80
+ # Configure extraction with citations
81
+ config = {
82
+ "response_format": Balance,
83
+ "llm_config": {
84
+ "model": "gpt-5",
85
+ "reasoning": {"effort": "minimal"},
86
+ },
87
+ "extraction_config": {
88
+ "include_citations": True,
89
+ "extraction_mode": "single_pass",
90
+ "page_numbers": [0, 1], # Optional: specify which pages to process
91
+ }
92
+ }
93
+
94
+ # Extract structured data
95
+ response = processor.extract(config)
96
+
97
+ # Get the extracted data and citations
98
+ data, citations = response
99
+ print(f"Extracted data: {data}")
100
+ print(f"Citations: {citations}")
101
+ ```
102
+
103
+ ### Sample Output
104
+
105
+ The `extract` method returns a tuple containing the extracted data and citation information:
106
+
107
+ ```python
108
+ (Balance(ending_balance=111.61),
109
+ {'ending_balance': {'value': 111.61,
110
+ 'citations': [{'page': 0,
111
+ 'bboxes': [{'x0': 0.058823529411764705,
112
+ 'top': 0.6095707475757575,
113
+ 'x1': 0.5635455037254902,
114
+ 'bottom': 0.6221969596969696}]}]}})
115
+ ```
116
+
117
+ ## Documentation
118
+
119
+ For more detailed documentation, see the [docs](./docs) directory or visit the [documentation site](https://zeel-04.github.io/document-ai/).
120
+
121
+ ## Development Setup
122
+
123
+ Prerequisites:
124
+
125
+ - Python 3.10+
126
+ - uv
127
+
128
+ ```bash
129
+ git clone https://github.com/zeel-04/document-ai.git
130
+ cd document-ai
131
+ uv venv
132
+ uv sync
133
+ ```
@@ -0,0 +1,5 @@
1
+ # Document AI
2
+
3
+ A library for parsing, formatting, and processing documents that can be used to build AI-powered document processing pipelines.
4
+
5
+ > Please refer to the [quickstart](quickstart.md) for a guide on how to get started.
@@ -0,0 +1,125 @@
1
+ # Quickstart
2
+
3
+ This guide will help you get started with Document AI.
4
+
5
+ ## Installation
6
+
7
+ ## Requirements
8
+
9
+ - Python >= 3.10
10
+ - OpenAI API key
11
+
12
+ ### Install uv
13
+
14
+ First, install [uv](https://docs.astral.sh/uv/) if you haven't already:
15
+
16
+ ```bash
17
+ curl -LsSf https://astral.sh/uv/install.sh | sh
18
+ ```
19
+
20
+ ### Install from Source
21
+
22
+ Clone the repository and install the package:
23
+
24
+ ```bash
25
+ # Clone the repository
26
+ git clone https://github.com/zeel-04/document-ai.git
27
+ cd document-ai
28
+
29
+ # Install the package with uv
30
+ uv sync
31
+ ```
32
+
33
+ ### Install from Git (Alternative)
34
+
35
+ You can also install directly from the git repository:
36
+
37
+ ```bash
38
+ uv pip install git+https://github.com/zeel-04/document-ai.git
39
+ ```
40
+
41
+ ## Environment Setup
42
+
43
+ Document AI uses OpenAI's API for document processing. Set up your API key:
44
+
45
+ ```bash
46
+ # Create a .env file
47
+ echo "OPENAI_API_KEY=your-api-key-here" > .env
48
+ ```
49
+
50
+ ## Basic Usage
51
+
52
+ Here's a simple example to extract structured data from a PDF document:
53
+
54
+ ```python
55
+ from dotenv import load_dotenv
56
+ from document_ai.processer import DocumentProcessor
57
+ from document_ai.llm import OpenAILLM
58
+ from pydantic import BaseModel
59
+
60
+ # Load environment variables
61
+ load_dotenv()
62
+
63
+ # Initialize the LLM
64
+ llm = OpenAILLM()
65
+
66
+ # Create a processor from a PDF file
67
+ processor = DocumentProcessor.from_digital_pdf(
68
+ uri="path/to/your/document.pdf",
69
+ llm=llm,
70
+ )
71
+
72
+ # Define your data model
73
+ class Balance(BaseModel):
74
+ ending_balance: float
75
+
76
+ # Configure extraction with citations
77
+ config = {
78
+ "response_format": Balance,
79
+ "llm_config": {
80
+ "model": "gpt-5",
81
+ "reasoning": {"effort": "minimal"},
82
+ },
83
+ "extraction_config": {
84
+ "include_citations": True,
85
+ "extraction_mode": "single_pass",
86
+ "page_numbers": [0, 1], # Optional: specify which pages to process
87
+ }
88
+ }
89
+
90
+ # Extract structured data
91
+ response = processor.extract(config)
92
+
93
+ # Get the extracted data and citations
94
+ data, citations = response
95
+ print(f"Extracted data: {data}")
96
+ print(f"Citations: {citations}")
97
+ ```
98
+
99
+ ### Sample Output
100
+
101
+ The `extract` method returns a tuple containing:
102
+ 1. The extracted data as a Pydantic model instance
103
+ 2. A dictionary with citation information for each field
104
+
105
+ ```python
106
+ # Example output
107
+ (Balance(ending_balance=111.61),
108
+ {'ending_balance': {'value': 111.61,
109
+ 'citations': [{'page': 0,
110
+ 'bboxes': [{'x0': 0.058823529411764705,
111
+ 'top': 0.6095707475757575,
112
+ 'x1': 0.5635455037254902,
113
+ 'bottom': 0.6221969596969696}]}]}})
114
+ ```
115
+
116
+ ### Configuration Options
117
+
118
+ - **response_format**: Your Pydantic model class
119
+ - **llm_config**:
120
+ - `model`: The OpenAI model to use (e.g., "gpt-5", "gpt-4o")
121
+ - `reasoning`: Optional reasoning configuration with `effort` level ("minimal", "low", "medium", "high")
122
+ - **extraction_config**:
123
+ - `include_citations`: Set to `True` to get citation information
124
+ - `extraction_mode`: "single_pass" for single-pass extraction
125
+ - `page_numbers`: Optional list of page indices to process (0-indexed)
@@ -0,0 +1 @@
1
+ from document_ai.base import BaseFormatter, BaseParser, Document
@@ -0,0 +1,64 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any
3
+
4
+ from langchain_core.output_parsers import JsonOutputParser
5
+
6
+ from .schemas.core import Document, PydanticModel
7
+
8
+
9
+ class BaseParser(ABC):
10
+ @abstractmethod
11
+ def parse(self, document: Document) -> PydanticModel:
12
+ pass
13
+
14
+
15
+ class BaseFormatter(ABC):
16
+ @abstractmethod
17
+ def format_document_for_llm(
18
+ self,
19
+ document: Document,
20
+ **kwargs,
21
+ ) -> str:
22
+ pass
23
+
24
+
25
+ class BaseLLM(ABC):
26
+ @abstractmethod
27
+ def generate_structured_output(
28
+ self,
29
+ model: str,
30
+ messages: list[dict[str, str]],
31
+ reasoning: Any,
32
+ output_format: type[PydanticModel],
33
+ openai_text: dict[str, Any] | None = None,
34
+ ) -> PydanticModel | None:
35
+ pass
36
+
37
+ @abstractmethod
38
+ def generate_text(
39
+ self,
40
+ system_prompt: str,
41
+ user_prompt: str,
42
+ **kwargs,
43
+ ) -> str:
44
+ pass
45
+
46
+
47
+ class BaseExtractor(ABC):
48
+ def __init__(
49
+ self,
50
+ llm: BaseLLM,
51
+ ):
52
+ self.llm = llm
53
+ self.json_parser = JsonOutputParser()
54
+
55
+ @abstractmethod
56
+ def extract(
57
+ self,
58
+ document: Document,
59
+ llm_config: dict[str, Any],
60
+ extraction_config: dict[str, Any],
61
+ formatter: BaseFormatter,
62
+ response_format: type[PydanticModel],
63
+ ) -> tuple[PydanticModel, dict[str, Any] | None]:
64
+ pass
@@ -0,0 +1,7 @@
1
+ config = {
2
+ "digital_pdf": {
3
+ "llm": {
4
+ "model": "gpt-5-mini",
5
+ },
6
+ },
7
+ }
@@ -0,0 +1,76 @@
1
+ from typing import Any
2
+
3
+ from loguru import logger
4
+
5
+ from .base import BaseExtractor, BaseFormatter
6
+ from .llm import BaseLLM
7
+ from .pydantic_to_json_instance_schema import (
8
+ pydantic_to_json_instance_schema,
9
+ stringify_schema,
10
+ )
11
+ from .schemas.core import Document, PydanticModel
12
+ from .types.pdf import PDFExtractionMode
13
+ from .utils import enrich_citations_with_bboxes, strip_citations
14
+
15
+
16
+ class DigitalPDFExtractor(BaseExtractor):
17
+ def __init__(self, llm: BaseLLM):
18
+ super().__init__(llm)
19
+
20
+ self.system_prompt = """Act as an expert in the field of document extraction and information extraction from documents."""
21
+ self.user_prompt = """Your job is to extract structured mentioned in schema data from a document given below.
22
+
23
+ DOCUMENT:
24
+ {content_text}
25
+
26
+ OUTPUT SCHEMA:
27
+ {schema}
28
+
29
+ Generate output in JSON format.
30
+ """
31
+
32
+ def extract(
33
+ self,
34
+ document: Document,
35
+ llm_config: dict[str, Any],
36
+ extraction_config: dict[str, Any],
37
+ formatter: BaseFormatter,
38
+ response_format: type[PydanticModel],
39
+ ) -> tuple[PydanticModel, dict[str, Any] | None]:
40
+ if document.extraction_mode == PDFExtractionMode.SINGLE_PASS:
41
+ json_instance_schema = stringify_schema(
42
+ pydantic_to_json_instance_schema(
43
+ response_format,
44
+ citation=document.include_citations,
45
+ citation_level="line",
46
+ )
47
+ )
48
+ logger.debug(
49
+ f"DigitalPDFExtractor: extract: json_instance_schema: {json_instance_schema}"
50
+ )
51
+ content_text = formatter.format_document_for_llm(
52
+ document, **extraction_config
53
+ )
54
+ logger.debug(f"DigitalPDFExtractor: extract: content_text: {content_text}")
55
+ user_prompt = self.user_prompt.format(
56
+ content_text=content_text, schema=json_instance_schema
57
+ )
58
+
59
+ response = self.llm.generate_text(
60
+ system_prompt=self.system_prompt,
61
+ user_prompt=user_prompt,
62
+ **llm_config,
63
+ )
64
+
65
+ if document.extraction_mode == PDFExtractionMode.MULTI_PASS:
66
+ raise NotImplementedError("Multi-pass extraction is not implemented yet")
67
+
68
+ response_dict = self.json_parser.parse(response)
69
+
70
+ if document.include_citations:
71
+ response_metadata = enrich_citations_with_bboxes(response_dict, document)
72
+ response_dict = strip_citations(response_metadata)
73
+ else:
74
+ response_metadata = None
75
+
76
+ return response_format(**response_dict), response_metadata