langchain-ocr-lib 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,13 @@
3
3
  from abc import ABC, abstractmethod
4
4
  import inject
5
5
 
6
+ from langchain_ocr_lib.di_binding_keys.binding_keys import LangfuseTracedChainKey
7
+
6
8
 
7
9
  class File2MarkdownConverter(ABC):
8
10
  """Abstract base class for the File2MarkdownConverter class."""
9
11
 
10
- _chain = inject.attr("LangfuseTracedChain")
12
+ _chain = inject.attr(LangfuseTracedChainKey)
11
13
 
12
14
  @abstractmethod
13
15
  async def aconvert2markdown(self, file: bytes) -> str:
File without changes
@@ -0,0 +1,29 @@
1
+ """Define key classes for dependency bindings. More reliable than using strings."""
2
+
3
+
4
+ class LargeLanguageModelKey:
5
+ pass
6
+
7
+
8
+ class LangfuseClientKey:
9
+ pass
10
+
11
+
12
+ class LangfuseManagerKey:
13
+ pass
14
+
15
+
16
+ class OcrChainKey:
17
+ pass
18
+
19
+
20
+ class LangfuseTracedChainKey:
21
+ pass
22
+
23
+
24
+ class PdfConverterKey:
25
+ pass
26
+
27
+
28
+ class ImageConverterKey:
29
+ pass
@@ -2,12 +2,22 @@
2
2
 
3
3
  from inject import Binder
4
4
  import inject
5
+ from langchain_ocr_lib.di_binding_keys.binding_keys import (
6
+ ImageConverterKey,
7
+ LangfuseClientKey,
8
+ LangfuseManagerKey,
9
+ LangfuseTracedChainKey,
10
+ LargeLanguageModelKey,
11
+ OcrChainKey,
12
+ PdfConverterKey,
13
+ )
5
14
  from langchain_ollama import ChatOllama
6
15
  from langchain_openai import ChatOpenAI
7
16
  from langfuse import Langfuse
8
17
 
9
18
  from langchain_ocr_lib.impl.chains.ocr_chain import OcrChain
10
19
  from langchain_ocr_lib.impl.settings.ollama_chat_settings import OllamaSettings
20
+ from langchain_ocr_lib.impl.settings.vllm_chat_settings import VllmSettings
11
21
  from langchain_ocr_lib.impl.settings.openai_chat_settings import OpenAISettings
12
22
  from langchain_ocr_lib.impl.settings.llm_class_type_settings import LlmClassTypeSettings
13
23
  from langchain_ocr_lib.impl.settings.langfuse_settings import LangfuseSettings
@@ -44,14 +54,17 @@ def lib_di_config(binder: Binder):
44
54
  elif llm_class_type_settings.llm_type == "openai":
45
55
  settings = OpenAISettings()
46
56
  llm_instance = llm_provider(settings, ChatOpenAI)
57
+ elif llm_class_type_settings.llm_type == "vllm":
58
+ settings = VllmSettings()
59
+ llm_instance = llm_provider(settings, ChatOpenAI)
47
60
  else:
48
61
  raise NotImplementedError("Configured LLM is not implemented")
49
- binder.bind("LargeLanguageModel", llm_instance)
62
+ binder.bind(LargeLanguageModelKey, llm_instance)
50
63
 
51
64
  prompt = ocr_prompt_template_builder(language=language_settings.language, model_name=settings.model)
52
65
 
53
66
  binder.bind(
54
- "LangfuseClient",
67
+ LangfuseClientKey,
55
68
  Langfuse(
56
69
  public_key=langfuse_settings.public_key,
57
70
  secret_key=langfuse_settings.secret_key,
@@ -60,7 +73,7 @@ def lib_di_config(binder: Binder):
60
73
  )
61
74
 
62
75
  binder.bind(
63
- "LangfuseManager",
76
+ LangfuseManagerKey,
64
77
  LangfuseManager(
65
78
  managed_prompts={
66
79
  OcrChain.__name__: prompt,
@@ -68,17 +81,17 @@ def lib_di_config(binder: Binder):
68
81
  ),
69
82
  )
70
83
 
71
- binder.bind("OcrChain", OcrChain())
84
+ binder.bind(OcrChainKey, OcrChain())
72
85
 
73
86
  binder.bind(
74
- "LangfuseTracedChain",
87
+ LangfuseTracedChainKey,
75
88
  LangfuseTracedChain(
76
89
  settings=langfuse_settings,
77
90
  ),
78
91
  )
79
92
 
80
- binder.bind("PdfConverter", Pdf2MarkdownConverter())
81
- binder.bind("ImageConverter", Image2MarkdownConverter())
93
+ binder.bind(PdfConverterKey, Pdf2MarkdownConverter())
94
+ binder.bind(ImageConverterKey, Image2MarkdownConverter())
82
95
 
83
96
 
84
97
  def configure_di():
@@ -7,6 +7,7 @@ from langchain_core.runnables.utils import Input
7
7
  import inject
8
8
 
9
9
  from langchain_ocr_lib.chains.chain import Chain
10
+ from langchain_ocr_lib.di_binding_keys.binding_keys import LangfuseManagerKey
10
11
 
11
12
  RunnableInput = Input # TODO: adjust properly
12
13
  RunnableOutput = str
@@ -15,7 +16,7 @@ RunnableOutput = str
15
16
  class OcrChain(Chain[RunnableInput, RunnableOutput]):
16
17
  """Base class for LLM answer generation chain."""
17
18
 
18
- _langfuse_manager = inject.attr("LangfuseManager")
19
+ _langfuse_manager = inject.attr(LangfuseManagerKey)
19
20
 
20
21
  def __init__(self):
21
22
  """Initialize the AnswerGenerationChain.
@@ -48,9 +48,11 @@ class Pdf2MarkdownConverter(File2MarkdownConverter):
48
48
  with open(filename, "rb") as f:
49
49
  file = f.read()
50
50
  except Exception as e:
51
- raise ValueError("PDF corrupted or unsupported file type, %s" % e)
52
-
53
- images = convert_from_bytes(file)
51
+ raise ValueError("PDF corrupted or unsupported file type") from e
52
+ try:
53
+ images = convert_from_bytes(file)
54
+ except Exception as e:
55
+ raise ValueError("PDF corrupted or unsupported file type") from e
54
56
 
55
57
  markdown = ""
56
58
  for image in images:
@@ -93,7 +95,10 @@ class Pdf2MarkdownConverter(File2MarkdownConverter):
93
95
  except Exception as e:
94
96
  raise ValueError("PDF corrupted or unsupported file type") from e
95
97
 
96
- images = convert_from_bytes(file)
98
+ try:
99
+ images = convert_from_bytes(file)
100
+ except Exception as e:
101
+ raise ValueError("PDF corrupted or unsupported file type") from e
97
102
 
98
103
  markdown = ""
99
104
  for image in images:
@@ -10,6 +10,9 @@ from langchain_core.language_models.llms import LLM
10
10
  from langfuse.api.resources.commons.errors.not_found_error import NotFoundError
11
11
  from langfuse.model import ChatPromptClient
12
12
 
13
+ from langchain_ocr_lib.di_binding_keys.binding_keys import LangfuseClientKey, LargeLanguageModelKey
14
+
15
+
13
16
  logger = logging.getLogger(__name__)
14
17
 
15
18
 
@@ -23,8 +26,8 @@ class LangfuseManager:
23
26
  """
24
27
 
25
28
  API_KEY_FILTER: str = "api_key"
26
- _llm = inject.attr("LargeLanguageModel")
27
- _langfuse = inject.attr("LangfuseClient")
29
+ _llm = inject.attr(LargeLanguageModelKey)
30
+ _langfuse = inject.attr(LangfuseClientKey)
28
31
 
29
32
  def __init__(
30
33
  self,
@@ -136,12 +139,16 @@ class LangfuseManager:
136
139
  fallback = self._managed_prompts[name]
137
140
  if isinstance(fallback, ChatPromptTemplate):
138
141
  return fallback
139
- if isinstance(fallback, list) and len(fallback) > 0 and isinstance(fallback[0], dict) and "content" in fallback[0]:
142
+ if (
143
+ isinstance(fallback, list)
144
+ and len(fallback) > 0
145
+ and isinstance(fallback[0], dict)
146
+ and "content" in fallback[0]
147
+ ):
140
148
  image_payload = [{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,{image_data}"}}]
141
149
  return ChatPromptTemplate.from_messages([("system", fallback[0]["content"]), ("user", image_payload)])
142
- else:
143
- logger.error("Unexpected structure for fallback prompt.")
144
- raise ValueError("Unexpected structure for fallback prompt.")
150
+ logger.error("Unexpected structure for fallback prompt.")
151
+ raise ValueError("Unexpected structure for fallback prompt.")
145
152
  langchain_prompt = langfuse_prompt.get_langchain_prompt()
146
153
 
147
154
  langchain_prompt[-1] = ("user", json.loads(langchain_prompt[-1][1]))
@@ -9,3 +9,4 @@ class LLMType(StrEnum):
9
9
 
10
10
  OLLAMA = "ollama"
11
11
  OPENAI = "openai"
12
+ VLLM = "vllm"
@@ -18,8 +18,8 @@ class OpenAISettings(BaseSettings):
18
18
  Total probability mass of tokens to consider at each step.
19
19
  temperature : float
20
20
  What sampling temperature to use.
21
- vision_capable : bool
22
- Flag to enable a vision capable model.
21
+ base_url : str
22
+ The base URL for the OpenAI API endpoint.
23
23
  """
24
24
 
25
25
  class Config:
@@ -31,5 +31,8 @@ class OpenAISettings(BaseSettings):
31
31
  model: str = Field(default="gpt-4o-mini-search-preview-2025-03-11", description="The model identifier")
32
32
  api_key: str = Field(default="", description="The API key for authentication")
33
33
  top_p: float = Field(default=1.0, description="Total probability mass of tokens to consider at each step")
34
- temperature: float = Field(default=0.7, description="What sampling temperature to use")
35
- vision_capable: bool = Field(default=False, description="Enable a vision capable model")
34
+ temperature: float = Field(default=0, description="What sampling temperature to use")
35
+ base_url: str = Field(
36
+ default="https://api.openai.com/v1",
37
+ description="The base URL for the OpenAI API endpoint",
38
+ )
@@ -0,0 +1,38 @@
1
+ """Module contains settings regarding the Vllm API."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+
7
+ class VllmSettings(BaseSettings):
8
+ """
9
+ Contains settings regarding the Vllm API.
10
+
11
+ Attributes
12
+ ----------
13
+ model : str
14
+ The model identifier.
15
+ api_key : str
16
+ The API key for authentication.
17
+ top_p : float
18
+ Total probability mass of tokens to consider at each step.
19
+ temperature : float
20
+ What sampling temperature to use.
21
+ base_url : str
22
+ The base URL for the Vllm API endpoint.
23
+ """
24
+
25
+ class Config:
26
+ """Config class for reading fields from environment variables."""
27
+
28
+ env_prefix = "VLLM_"
29
+ case_sensitive = False
30
+
31
+ model: str = Field(default="", description="The model identifier")
32
+ api_key: str = Field(default="", description="The API key for authentication")
33
+ top_p: float = Field(default=1.0, description="Total probability mass of tokens to consider at each step")
34
+ temperature: float = Field(default=0, description="What sampling temperature to use")
35
+ base_url: str = Field(
36
+ default="http://localhost:8000/v1",
37
+ description="The base URL for the Vllm API endpoint",
38
+ )
@@ -8,6 +8,7 @@ from langfuse.callback import CallbackHandler
8
8
 
9
9
  from langchain_ocr_lib.impl.settings.langfuse_settings import LangfuseSettings
10
10
  from langchain_ocr_lib.tracers.traced_chain import TracedChain
11
+ from langchain_ocr_lib.di_config import OcrChainKey
11
12
 
12
13
 
13
14
  class LangfuseTracedChain(TracedChain):
@@ -23,7 +24,7 @@ class LangfuseTracedChain(TracedChain):
23
24
  """
24
25
 
25
26
  CONFIG_CALLBACK_KEY = "callbacks"
26
- _inner_chain = inject.attr("OcrChain")
27
+ _inner_chain = inject.attr(OcrChainKey)
27
28
 
28
29
  def __init__(self, settings: LangfuseSettings):
29
30
  super().__init__()
@@ -0,0 +1,188 @@
1
+ Metadata-Version: 2.1
2
+ Name: langchain-ocr-lib
3
+ Version: 0.3.0
4
+ Summary:
5
+ License: MIT
6
+ Author: Andreas Klos
7
+ Author-email: aklos@outlook.de
8
+ Requires-Python: >=3.11,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Requires-Dist: deprecated (>=1.2.14,<2.0.0)
15
+ Requires-Dist: inject (>=5.2.1,<6.0.0)
16
+ Requires-Dist: langchain-community (>=0.3.19,<0.4.0)
17
+ Requires-Dist: langchain-ollama (>=0.2.0,<0.3.0)
18
+ Requires-Dist: langchain-openai (>=0.3.8,<0.4.0)
19
+ Requires-Dist: langfuse (>=2.59.7,<3.0.0)
20
+ Requires-Dist: openai (>=1.42.0,<2.0.0)
21
+ Requires-Dist: pdf2image (>=1.17.0,<2.0.0)
22
+ Requires-Dist: pillow (>=11.0.0,<12.0.0)
23
+ Requires-Dist: pycountry (>=24.6.1,<25.0.0)
24
+ Requires-Dist: pytest-asyncio (>=0.25.0,<0.26.0)
25
+ Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
26
+ Description-Content-Type: text/markdown
27
+
28
+ # langchain_ocr_lib
29
+
30
+ **langchain_ocr_lib** is the OCR processing engine behind LangChain-OCR. It provides a modular, vision-LLM-powered Chain to convert image and PDF documents into clean Markdown. Designed for direct CLI usage or integration into larger applications.
31
+
32
+ ## Table of Contents
33
+
34
+ 1. [Overview](#1-overview)
35
+ 2. [Features](#2-features)
36
+ 3. [Installation](#3-installation)
37
+ 1. [Prerequisites](#31-prerequisites)
38
+ 2. [Environment Setup](#32-environment-setup)
39
+ 4. [Usage](#4-usage)
40
+ 1. [CLI](#41-cli)
41
+ 2. [Python Module](#42-python-module)
42
+ 3. [Docker](#43-docker)
43
+ 5. [Architecture](#5-architecture)
44
+ 6. [Testing](#6-testing)
45
+ 7. [License](#7-license)
46
+
47
+ ---
48
+
49
+ ## 1. Overview
50
+
51
+ This package offers the core functionality to extract text from documents using vision LLMs and convert it into Markdown. It is highly configurable by environment variables and its design based on dependency injection, that allows you to easily swap out components. The package is designed to be used as a library, but it also provides a command-line interface (CLI) for easy local execution.
52
+
53
+ ---
54
+
55
+ ## 2. Features
56
+
57
+ - **Vision-Language OCR:** Supports Ollama, vLLM and OpenAI (and other OpenAI conform providers). Other LLM providers can be easily integrated.
58
+ - **CLI Interface:** Simple local execution via command line or container
59
+ - **Highly Configurable:** Use environment variables to configure the OCR
60
+ - **Dependency Injection:** Easily swap out components for custom implementations
61
+ - **LangChain:** Integrates with LangChain
62
+ - **Markdown Output:** Outputs well-formatted Markdown text
63
+
64
+ ---
65
+
66
+ ## 3. Installation
67
+
68
+ ### 3.1 Prerequisites
69
+
70
+ - **Python:** 3.11+
71
+ - **Poetry:** [Install Poetry](https://python-poetry.org/docs/)
72
+ - **Docker:** For containerized CLI usage (optional)
73
+ - **Ollama:** Follow instructions [here](https://ollama.com) (other LLM providers can be used as well, see [here](#2-features))
74
+ - **Langfuse:** Different options for self hosting, see [here](https://langfuse.com/self-hosting) (optional, for observability)
75
+
76
+ ### 3.2 Environment Setup
77
+
78
+ The package is published on PyPI, so you can install it directly with pip:
79
+
80
+ ```bash
81
+ pip install langchain-ocr-lib
82
+ ```
83
+ However, if you want to run the latest version or contribute to the project, you can clone the repository and install it locally.
84
+
85
+ ```bash
86
+ git clone https://github.com/a-klos/langchain-ocr.git
87
+ cd langchain-ocr/langchain_ocr_lib
88
+ poetry install --with dev
89
+ ```
90
+
91
+ You can configure the package by setting environment variables. Configuration options are shown in the [`.env.template`](../.env.template) file.
92
+
93
+
94
+ ---
95
+
96
+ ## 4. Usage
97
+
98
+ Remember that you need to pull the configured LLM model first. With Ollama, you can do this with:
99
+ ```bash
100
+ ollama pull <model_name>
101
+ ```
102
+ For example, to pull the `gemma3:4b-it-q4_K_M` model, run:
103
+
104
+ ```bash
105
+ ollama pull gemma3:4b-it-q4_K_M
106
+ ```
107
+
108
+ ### 4.1 CLI
109
+
110
+ Run OCR locally from the terminal:
111
+
112
+ ```bash
113
+ langchain-ocr <<input_file>>
114
+ ```
115
+
116
+ Supports:
117
+ - `.jpg`, `.jpeg`, `.png`, and `.pdf` inputs
118
+
119
+ ### 4.2 Python Module
120
+
121
+ Use the the library programmatically:
122
+
123
+ ```python
124
+ import inject
125
+
126
+ import configure_di
127
+ from langchain_ocr_lib.di_config import configure_di
128
+ from langchain_ocr_lib.di_binding_keys.binding_keys import PdfConverterKey
129
+ from langchain_ocr_lib.impl.converter.pdf_converter import Pdf2MarkdownConverter
130
+
131
+
132
+ configure_di() #This sets up the dependency injection
133
+
134
+ class Converter:
135
+ _converter: Pdf2MarkdownConverter = inject.attr(PdfConverterKey)
136
+ def convert(self, filename: str) -> str:
137
+ return self._converter.convert2markdown(filename=filename)
138
+
139
+ converter = Converter()
140
+ markdown = converter.convert("../docs/invoice.pdf") # Adjust the file path as needed
141
+ print(markdown)
142
+ ```
143
+
144
+ The `configure_di()` function sets up the dependency injection for the library. The dependencies can be easily swapped out or appended with new dependencies. See [../api/src/langchain_ocr/di_config.py](../api/src/langchain_ocr/di_config.py) for more details on how to add new dependencies.
145
+
146
+ Swapping out the dependencies can be done as follows:
147
+
148
+ ```python
149
+ import inject
150
+ from inject import Binder
151
+
152
+ from langchain_ocr_lib.di_config import lib_di_config, PdfConverterKey
153
+ from langchain_ocr_lib.impl.converter.pdf_converter import Pdf2MarkdownConverter
154
+
155
+
156
+ class MyPdfConverter(Pdf2MarkdownConverter):
157
+ def convert(self, filename: str) -> None:
158
+ markdown = self.convert2markdown(filename=filename)
159
+ print(markdown)
160
+
161
+ def _api_specific_config(binder: Binder):
162
+ binder.install(lib_di_config) # Install all default bindings
163
+ binder.bind(PdfConverterKey, MyPdfConverter()) # Then override PdfConverter
164
+
165
+ def configure():
166
+ """Configure the dependency injection container."""
167
+ inject.configure(_api_specific_config, allow_override=True, clear=True)
168
+
169
+ configure()
170
+
171
+ class Converter:
172
+ _converter: MyPdfConverter = inject.attr(PdfConverterKey)
173
+ def convert(self, filename: str) -> None:
174
+ self._converter.convert(filename=filename)
175
+
176
+ converter = Converter()
177
+ converter.convert("../docs/invoice.pdf") # Adjust the file path as needed
178
+ ```
179
+
180
+ ### 4.3 Docker
181
+
182
+ Run OCR via Docker without local Python setup:
183
+
184
+ ```bash
185
+ docker build -t ocr -f langchain_ocr_lib/Dockerfile .
186
+ docker run --net=host -it --rm -v ./docs:/app/docs:ro ocr docs/invoice.png
187
+ ```
188
+
@@ -2,34 +2,37 @@ langchain_ocr_lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
2
2
  langchain_ocr_lib/chains/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  langchain_ocr_lib/chains/chain.py,sha256=D00wnm987YgkJsIAIwQVehX_B4kBOzrjistaPf1M0GE,1946
4
4
  langchain_ocr_lib/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- langchain_ocr_lib/converter/converter.py,sha256=aJuaLX2942d8WRPNaU0cUPO1_266QE7Y6SVKpnxpGBA,1196
6
- langchain_ocr_lib/di_config.py,sha256=H1CxtSlzUH3QGkRFBQqgMGJZx5HGWQ0yrB2kEvFIbOk,3083
5
+ langchain_ocr_lib/converter/converter.py,sha256=oDUNzVWD743RgqIal7T4OVv-Z1RKE9uQYzAIPpgY3o8,1280
6
+ langchain_ocr_lib/di_binding_keys/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ langchain_ocr_lib/di_binding_keys/binding_keys.py,sha256=jE8rwNcLaI0NflIMkK0vu0LVy5o4y0pYgdjbpDNTGyk,338
8
+ langchain_ocr_lib/di_config.py,sha256=eYzDi_LJaYY_JhRnNqW3VYGd3N1QblaGFjWlL_6Vx9c,3537
7
9
  langchain_ocr_lib/impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
10
  langchain_ocr_lib/impl/chains/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- langchain_ocr_lib/impl/chains/ocr_chain.py,sha256=eGiflXVbo1UP56rRHPY6fB4woJtyIvv4SmXNC1RHWFY,2594
11
+ langchain_ocr_lib/impl/chains/ocr_chain.py,sha256=stE8RLE1ieRHf6XHreKCRfhNfXzw9fNLTake7xQBGL8,2673
10
12
  langchain_ocr_lib/impl/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
13
  langchain_ocr_lib/impl/converter/image_converter.py,sha256=G1rDOCbudWNL4sDvSGJ7CeeFrWUblfWPGaZf5JsnpiM,2871
12
- langchain_ocr_lib/impl/converter/pdf_converter.py,sha256=ssj8DL_9wf6kMhjUhDkw0gwSwNLrvgh8nBRspwj60Vk,3510
14
+ langchain_ocr_lib/impl/converter/pdf_converter.py,sha256=pTHPojuNLCSWJp4FzXBHshXva2sBGyOs6Y7jnKJrnNo,3760
13
15
  langchain_ocr_lib/impl/langfuse_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- langchain_ocr_lib/impl/langfuse_manager/langfuse_manager.py,sha256=G_qGE_-LnPpNJYgkoDoVqoXYkwsaMkB_HN2uSng3YVA,5245
16
+ langchain_ocr_lib/impl/langfuse_manager/langfuse_manager.py,sha256=AEF1iFYghr-62gcPcUb4Yi3DNRYfe-JsIWd3ymsIU8I,5403
15
17
  langchain_ocr_lib/impl/llms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
18
  langchain_ocr_lib/impl/llms/llm_factory.py,sha256=9DsUdoYNrjeWLGA9ISDdHN2cxcQ7DquNQ5it6zSxHlg,2199
17
- langchain_ocr_lib/impl/llms/llm_type.py,sha256=_Ap7yStlBn0tyOyfVLH1c2j2A9-ccsTCxAm7bgoRQnM,268
19
+ langchain_ocr_lib/impl/llms/llm_type.py,sha256=_LKtdVuTRYX6gupkxJtEtIwrbtiMvZmG8WOxfzlm42M,286
18
20
  langchain_ocr_lib/impl/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
21
  langchain_ocr_lib/impl/settings/langfuse_settings.py,sha256=5lr3tVeiHXDUaYtWAnZPXrKxBJgM2wgaz7yyZThhCsE,812
20
22
  langchain_ocr_lib/impl/settings/language_settings.py,sha256=tdAC1t5wGu1MoH1jhjkDnxnX4Ui7giwxt7Qm8_LPkP8,627
21
23
  langchain_ocr_lib/impl/settings/llm_class_type_settings.py,sha256=4KC6zxby13wn38rB8055J8LNVTsmUfrOiyLtLuToHaM,598
22
24
  langchain_ocr_lib/impl/settings/ollama_chat_settings.py,sha256=8RWMsaK4qDrqC6Mrxekr8IEDYwcvjYwhw9xDwZemxI4,1506
23
- langchain_ocr_lib/impl/settings/openai_chat_settings.py,sha256=cXzxe-sea8VCK2M_u9ZIL4l8AR_YdhmA-phZa9fwf8o,1233
25
+ langchain_ocr_lib/impl/settings/openai_chat_settings.py,sha256=gZqmFYDtF0l5lEAnuT2VzdqLWKnTPSK_lTeg7ERmJas,1276
26
+ langchain_ocr_lib/impl/settings/vllm_chat_settings.py,sha256=y8PPNUcce1uA4kEu6p0p5vCwCOGp9uEEvHbCoS1Ohh8,1226
24
27
  langchain_ocr_lib/impl/tracers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- langchain_ocr_lib/impl/tracers/langfuse_traced_chain.py,sha256=lfYLEf9mJ2ie5wofHFG_FUicRi1281XGBC0GKXcAkHM,1546
28
+ langchain_ocr_lib/impl/tracers/langfuse_traced_chain.py,sha256=syjwNt8HfVmaWXZ-ElFYsc-KwpnKQz2LE3K5jV7c3GE,1599
26
29
  langchain_ocr_lib/language_mapping/language_mapping.py,sha256=VY7WkkZauoHNxkvgUYbig0rDmlKqDkz24cXMd6A7txM,700
27
30
  langchain_ocr_lib/main.py,sha256=_kx6pIsIV9pii2_TSYisFT4tKDQHMHef6buWhWoj11E,3485
28
31
  langchain_ocr_lib/prompt_templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
32
  langchain_ocr_lib/prompt_templates/ocr_prompt.py,sha256=3Be1AL-HJkxPnAP0DNH1MqvAxFWTCeM5UOKP63xkHsY,3543
30
33
  langchain_ocr_lib/tracers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
34
  langchain_ocr_lib/tracers/traced_chain.py,sha256=uxRkdLNn_G6dAsti_gUuF7muhIj10xrOUL7HUga40oc,3056
32
- langchain_ocr_lib-0.1.0.dist-info/METADATA,sha256=2H3iEatfiflH4GcrFhIw2Cg8wjsgKsLoeP2irFsVTio,991
33
- langchain_ocr_lib-0.1.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
34
- langchain_ocr_lib-0.1.0.dist-info/entry_points.txt,sha256=l4mIs0tnIgbJYuVveZySQKVBnqNMHS-8ZZtLwz8ag5k,61
35
- langchain_ocr_lib-0.1.0.dist-info/RECORD,,
35
+ langchain_ocr_lib-0.3.0.dist-info/METADATA,sha256=IaqIz9OXgu5WQXwEVpLmMNLmz2w3IowWmdZ7kt5O6VM,6240
36
+ langchain_ocr_lib-0.3.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
37
+ langchain_ocr_lib-0.3.0.dist-info/entry_points.txt,sha256=l4mIs0tnIgbJYuVveZySQKVBnqNMHS-8ZZtLwz8ag5k,61
38
+ langchain_ocr_lib-0.3.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.2
2
+ Generator: poetry-core 1.9.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,28 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: langchain-ocr-lib
3
- Version: 0.1.0
4
- Summary:
5
- License: MIT
6
- Author: Andreas Klos
7
- Author-email: aklos@outlook.de
8
- Requires-Python: >=3.11,<4.0
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.11
12
- Classifier: Programming Language :: Python :: 3.12
13
- Classifier: Programming Language :: Python :: 3.13
14
- Requires-Dist: deprecated (>=1.2.14,<2.0.0)
15
- Requires-Dist: inject (>=5.2.1,<6.0.0)
16
- Requires-Dist: langchain-community (>=0.3.19,<0.4.0)
17
- Requires-Dist: langchain-ollama (>=0.2.0,<0.3.0)
18
- Requires-Dist: langchain-openai (>=0.3.8,<0.4.0)
19
- Requires-Dist: langfuse (>=2.59.7,<3.0.0)
20
- Requires-Dist: openai (>=1.42.0,<2.0.0)
21
- Requires-Dist: pdf2image (>=1.17.0,<2.0.0)
22
- Requires-Dist: pillow (>=11.0.0,<12.0.0)
23
- Requires-Dist: pycountry (>=24.6.1,<25.0.0)
24
- Requires-Dist: pytest-asyncio (>=0.25.0,<0.26.0)
25
- Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
26
- Description-Content-Type: text/markdown
27
-
28
-