langchain-ocr-lib 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_ocr_lib/di_config.py +14 -9
- langchain_ocr_lib/impl/langfuse_manager/langfuse_manager.py +13 -7
- langchain_ocr_lib/impl/llms/llm_factory.py +1 -0
- langchain_ocr_lib/impl/settings/langfuse_settings.py +4 -0
- langchain_ocr_lib/impl/settings/openai_chat_settings.py +7 -2
- langchain_ocr_lib/impl/settings/vllm_chat_settings.py +7 -2
- {langchain_ocr_lib-0.3.1.dist-info → langchain_ocr_lib-0.3.3.dist-info}/METADATA +10 -6
- {langchain_ocr_lib-0.3.1.dist-info → langchain_ocr_lib-0.3.3.dist-info}/RECORD +10 -10
- {langchain_ocr_lib-0.3.1.dist-info → langchain_ocr_lib-0.3.3.dist-info}/WHEEL +0 -0
- {langchain_ocr_lib-0.3.1.dist-info → langchain_ocr_lib-0.3.3.dist-info}/entry_points.txt +0 -0
langchain_ocr_lib/di_config.py
CHANGED
@@ -48,22 +48,25 @@ def lib_di_config(binder: Binder):
|
|
48
48
|
langfuse_settings = LangfuseSettings()
|
49
49
|
llm_class_type_settings = LlmClassTypeSettings()
|
50
50
|
language_settings = LanguageSettings()
|
51
|
-
|
51
|
+
model_name = ""
|
52
52
|
if llm_class_type_settings.llm_type == "ollama":
|
53
53
|
settings = OllamaSettings()
|
54
|
+
model_name = settings.model
|
54
55
|
partial_llm_provider = partial(llm_provider,settings, ChatOllama)
|
55
56
|
elif llm_class_type_settings.llm_type == "openai":
|
56
57
|
settings = OpenAISettings()
|
58
|
+
model_name = settings.model_name
|
57
59
|
partial_llm_provider = partial(llm_provider,settings, ChatOpenAI)
|
58
60
|
elif llm_class_type_settings.llm_type == "vllm":
|
59
61
|
settings = VllmSettings()
|
62
|
+
model_name = settings.model_name
|
60
63
|
partial_llm_provider = partial(llm_provider,settings, ChatOpenAI)
|
61
64
|
else:
|
62
65
|
raise NotImplementedError("Configured LLM is not implemented")
|
63
66
|
|
64
67
|
binder.bind_to_provider(LargeLanguageModelKey, partial_llm_provider)
|
65
68
|
|
66
|
-
prompt = ocr_prompt_template_builder(language=language_settings.language, model_name=
|
69
|
+
prompt = ocr_prompt_template_builder(language=language_settings.language, model_name=model_name)
|
67
70
|
|
68
71
|
binder.bind(
|
69
72
|
LangfuseClientKey,
|
@@ -80,17 +83,19 @@ def lib_di_config(binder: Binder):
|
|
80
83
|
managed_prompts={
|
81
84
|
OcrChain.__name__: prompt,
|
82
85
|
},
|
86
|
+
enabled=langfuse_settings.enabled,
|
83
87
|
),
|
84
88
|
)
|
85
89
|
|
86
|
-
binder.bind(OcrChainKey, OcrChain())
|
90
|
+
binder.bind(OcrChainKey if langfuse_settings.enabled else LangfuseTracedChainKey, OcrChain())
|
87
91
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
92
|
+
if langfuse_settings.enabled:
|
93
|
+
binder.bind(
|
94
|
+
LangfuseTracedChainKey,
|
95
|
+
LangfuseTracedChain(
|
96
|
+
settings=langfuse_settings,
|
97
|
+
),
|
98
|
+
)
|
94
99
|
|
95
100
|
binder.bind(PdfConverterKey, Pdf2MarkdownConverter())
|
96
101
|
binder.bind(ImageConverterKey, Image2MarkdownConverter())
|
@@ -32,8 +32,10 @@ class LangfuseManager:
|
|
32
32
|
def __init__(
|
33
33
|
self,
|
34
34
|
managed_prompts: dict[str, str],
|
35
|
+
enabled: bool = True,
|
35
36
|
):
|
36
37
|
self._managed_prompts = managed_prompts
|
38
|
+
self._enabled = enabled
|
37
39
|
|
38
40
|
def get_langfuse_prompt(self, base_prompt_name: str) -> Optional[ChatPromptClient]:
|
39
41
|
"""
|
@@ -56,6 +58,10 @@ class LangfuseManager:
|
|
56
58
|
Exception
|
57
59
|
If an error occurs while retrieving the prompt template from Langfuse.
|
58
60
|
"""
|
61
|
+
langfuse_prompt = None
|
62
|
+
if not self._enabled:
|
63
|
+
logger.info("Langfuse is not enabled. Using fallback prompt.")
|
64
|
+
return langfuse_prompt
|
59
65
|
try:
|
60
66
|
langfuse_prompt = self._langfuse.get_prompt(base_prompt_name)
|
61
67
|
except NotFoundError:
|
@@ -72,11 +78,7 @@ class LangfuseManager:
|
|
72
78
|
)
|
73
79
|
langfuse_prompt = self._langfuse.get_prompt(base_prompt_name)
|
74
80
|
except Exception as error:
|
75
|
-
logger.error(
|
76
|
-
"Error occured while getting prompt template from langfuse. Error:\n{error}",
|
77
|
-
extra={error: error},
|
78
|
-
)
|
79
|
-
return None
|
81
|
+
logger.error(f"Error occurred while getting prompt template from langfuse. Error:\n{error}")
|
80
82
|
return langfuse_prompt
|
81
83
|
|
82
84
|
def get_base_llm(self, name: str) -> LLM:
|
@@ -94,9 +96,12 @@ class LangfuseManager:
|
|
94
96
|
The base Large Language Model. If the Langfuse prompt is not found,
|
95
97
|
returns the LLM with a fallback configuration.
|
96
98
|
"""
|
99
|
+
if not self._enabled:
|
100
|
+
logger.info("Langfuse is not enabled. Using fallback LLM.")
|
101
|
+
return self._llm
|
97
102
|
langfuse_prompt = self.get_langfuse_prompt(name)
|
98
103
|
if not langfuse_prompt:
|
99
|
-
logger.
|
104
|
+
logger.warning("Could not retrieve prompt template from langfuse. Using fallback LLM.")
|
100
105
|
return self._llm
|
101
106
|
|
102
107
|
return self._llm.with_config({"configurable": langfuse_prompt.config})
|
@@ -121,7 +126,8 @@ class LangfuseManager:
|
|
121
126
|
"""
|
122
127
|
langfuse_prompt = self.get_langfuse_prompt(name)
|
123
128
|
if not langfuse_prompt:
|
124
|
-
|
129
|
+
if self._enabled:
|
130
|
+
logger.warning("Could not retrieve prompt template from langfuse. Using fallback value.")
|
125
131
|
fallback = self._managed_prompts[name]
|
126
132
|
if isinstance(fallback, ChatPromptTemplate):
|
127
133
|
return fallback
|
@@ -43,6 +43,7 @@ def get_configurable_fields_from(settings: BaseSettings) -> dict[str, Configurab
|
|
43
43
|
settings_of_interest = settings.model_fields[field_name]
|
44
44
|
if settings_of_interest.title is not None:
|
45
45
|
_fields[field_name] = ConfigurableField(id=field_name, name=settings_of_interest.title)
|
46
|
+
|
46
47
|
return _fields
|
47
48
|
|
48
49
|
|
@@ -27,3 +27,7 @@ class LangfuseSettings(BaseSettings):
|
|
27
27
|
secret_key: str = Field(default="", description="The secret key for Langfuse.")
|
28
28
|
public_key: str = Field(default="", description="The public key for Langfuse.")
|
29
29
|
host: str = Field(default="https://api.langchain.com", description="The host for Langfuse.")
|
30
|
+
enabled: bool = Field(
|
31
|
+
default=True,
|
32
|
+
description="Whether to enable Langfuse. If set to False, Langfuse will not be used.",
|
33
|
+
)
|
@@ -10,7 +10,7 @@ class OpenAISettings(BaseSettings):
|
|
10
10
|
|
11
11
|
Attributes
|
12
12
|
----------
|
13
|
-
|
13
|
+
model_name : str
|
14
14
|
The model identifier.
|
15
15
|
api_key : str
|
16
16
|
The API key for authentication.
|
@@ -28,7 +28,12 @@ class OpenAISettings(BaseSettings):
|
|
28
28
|
env_prefix = "OPENAI_"
|
29
29
|
case_sensitive = False
|
30
30
|
|
31
|
-
|
31
|
+
model_name: str = Field(
|
32
|
+
default="gpt-4o-mini-search-preview-2025-03-11",
|
33
|
+
env="MODEL",
|
34
|
+
description="The model identifier",
|
35
|
+
title="LLM Model",
|
36
|
+
)
|
32
37
|
api_key: str = Field(default="", description="The API key for authentication")
|
33
38
|
top_p: float = Field(default=1.0, description="Total probability mass of tokens to consider at each step", title="Top P")
|
34
39
|
temperature: float = Field(default=0, description="What sampling temperature to use", title="Temperature")
|
@@ -10,7 +10,7 @@ class VllmSettings(BaseSettings):
|
|
10
10
|
|
11
11
|
Attributes
|
12
12
|
----------
|
13
|
-
|
13
|
+
model_name : str
|
14
14
|
The model identifier.
|
15
15
|
api_key : str
|
16
16
|
The API key for authentication.
|
@@ -28,7 +28,12 @@ class VllmSettings(BaseSettings):
|
|
28
28
|
env_prefix = "VLLM_"
|
29
29
|
case_sensitive = False
|
30
30
|
|
31
|
-
|
31
|
+
model_name: str = Field(
|
32
|
+
default="",
|
33
|
+
env="MODEL",
|
34
|
+
description="The model identifier",
|
35
|
+
title="LLM Model",
|
36
|
+
)
|
32
37
|
api_key: str = Field(default="", description="The API key for authentication")
|
33
38
|
top_p: float = Field(default=1.0, description="Total probability mass of tokens to consider at each step", title="Top P")
|
34
39
|
temperature: float = Field(default=0, description="What sampling temperature to use", title="Temperature")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: langchain-ocr-lib
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.3
|
4
4
|
Summary: Modular, vision-LLM-powered chain to convert image and PDF documents into clean Markdown.
|
5
5
|
License: MIT
|
6
6
|
Author: Andreas Klos
|
@@ -25,9 +25,13 @@ Requires-Dist: pytest-asyncio (>=0.25.0,<0.26.0)
|
|
25
25
|
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
26
26
|
Description-Content-Type: text/markdown
|
27
27
|
|
28
|
-
#
|
28
|
+
# langchain-ocr-lib
|
29
29
|
|
30
|
-
**
|
30
|
+
**langchain-ocr-lib** is the OCR processing engine behind LangChain-OCR. It provides a modular, vision-LLM-powered Chain to convert image and PDF documents into clean Markdown. Designed for direct CLI usage or integration into larger applications.
|
31
|
+
|
32
|
+
<div align="center">
|
33
|
+
<img src="./images/logo.png" alt="Logo" style="width:30%;">
|
34
|
+
</div>
|
31
35
|
|
32
36
|
## Table of Contents
|
33
37
|
|
@@ -137,7 +141,7 @@ class Converter:
|
|
137
141
|
return self._converter.convert2markdown(filename=filename)
|
138
142
|
|
139
143
|
converter = Converter()
|
140
|
-
markdown = converter.convert("../
|
144
|
+
markdown = converter.convert("../examples/invoice.pdf") # Adjust the file path as needed
|
141
145
|
print(markdown)
|
142
146
|
```
|
143
147
|
|
@@ -174,7 +178,7 @@ class Converter:
|
|
174
178
|
self._converter.convert(filename=filename)
|
175
179
|
|
176
180
|
converter = Converter()
|
177
|
-
converter.convert("../
|
181
|
+
converter.convert("../examples/invoice.pdf") # Adjust the file path as needed
|
178
182
|
```
|
179
183
|
|
180
184
|
### 4.3 Docker
|
@@ -183,6 +187,6 @@ Run OCR via Docker without local Python setup:
|
|
183
187
|
|
184
188
|
```bash
|
185
189
|
docker build -t ocr -f langchain_ocr_lib/Dockerfile .
|
186
|
-
docker run --net=host -it --rm -v ./
|
190
|
+
docker run --net=host -it --rm -v ./examples:/app/examples:ro ocr examples/invoice.png
|
187
191
|
```
|
188
192
|
|
@@ -5,7 +5,7 @@ langchain_ocr_lib/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
|
|
5
5
|
langchain_ocr_lib/converter/converter.py,sha256=oDUNzVWD743RgqIal7T4OVv-Z1RKE9uQYzAIPpgY3o8,1280
|
6
6
|
langchain_ocr_lib/di_binding_keys/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
langchain_ocr_lib/di_binding_keys/binding_keys.py,sha256=jE8rwNcLaI0NflIMkK0vu0LVy5o4y0pYgdjbpDNTGyk,338
|
8
|
-
langchain_ocr_lib/di_config.py,sha256=
|
8
|
+
langchain_ocr_lib/di_config.py,sha256=MQT1U5TvoTRS8TDWsldPB994XepkJQ4k9XRd6HG4Yps,3935
|
9
9
|
langchain_ocr_lib/impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
langchain_ocr_lib/impl/chains/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
langchain_ocr_lib/impl/chains/ocr_chain.py,sha256=stE8RLE1ieRHf6XHreKCRfhNfXzw9fNLTake7xQBGL8,2673
|
@@ -13,17 +13,17 @@ langchain_ocr_lib/impl/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
13
13
|
langchain_ocr_lib/impl/converter/image_converter.py,sha256=G1rDOCbudWNL4sDvSGJ7CeeFrWUblfWPGaZf5JsnpiM,2871
|
14
14
|
langchain_ocr_lib/impl/converter/pdf_converter.py,sha256=pTHPojuNLCSWJp4FzXBHshXva2sBGyOs6Y7jnKJrnNo,3760
|
15
15
|
langchain_ocr_lib/impl/langfuse_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
langchain_ocr_lib/impl/langfuse_manager/langfuse_manager.py,sha256=
|
16
|
+
langchain_ocr_lib/impl/langfuse_manager/langfuse_manager.py,sha256=6kvPfYObLmbYZAvtYNx7g9xz5wYz0PxohEzyctpGRto,5353
|
17
17
|
langchain_ocr_lib/impl/llms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
langchain_ocr_lib/impl/llms/llm_factory.py,sha256=
|
18
|
+
langchain_ocr_lib/impl/llms/llm_factory.py,sha256=GSXBF5MKqo5zRdGEQE1pKI471dgUOYBRz7vCb_qg-pA,2204
|
19
19
|
langchain_ocr_lib/impl/llms/llm_type.py,sha256=_LKtdVuTRYX6gupkxJtEtIwrbtiMvZmG8WOxfzlm42M,286
|
20
20
|
langchain_ocr_lib/impl/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
-
langchain_ocr_lib/impl/settings/langfuse_settings.py,sha256=
|
21
|
+
langchain_ocr_lib/impl/settings/langfuse_settings.py,sha256=QT4_VwYj0msFbgL3qIQ-oer3Lt0qny0FFAyfssGu-q0,962
|
22
22
|
langchain_ocr_lib/impl/settings/language_settings.py,sha256=tdAC1t5wGu1MoH1jhjkDnxnX4Ui7giwxt7Qm8_LPkP8,627
|
23
23
|
langchain_ocr_lib/impl/settings/llm_class_type_settings.py,sha256=4KC6zxby13wn38rB8055J8LNVTsmUfrOiyLtLuToHaM,598
|
24
24
|
langchain_ocr_lib/impl/settings/ollama_chat_settings.py,sha256=YQkgD7CfOjHN5wkpJakO0GfM7-D2GqoJLP1gB2932ms,1525
|
25
|
-
langchain_ocr_lib/impl/settings/openai_chat_settings.py,sha256=
|
26
|
-
langchain_ocr_lib/impl/settings/vllm_chat_settings.py,sha256=
|
25
|
+
langchain_ocr_lib/impl/settings/openai_chat_settings.py,sha256=W1vPclnXfIDYRfZVMkdGX7ufbbF-cAQiWiKCmjrORO4,1393
|
26
|
+
langchain_ocr_lib/impl/settings/vllm_chat_settings.py,sha256=h4TFUt4iMZpZSI7qRNe9EB2n_nSjbSjDxSG6R5D3Ah8,1343
|
27
27
|
langchain_ocr_lib/impl/tracers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
28
|
langchain_ocr_lib/impl/tracers/langfuse_traced_chain.py,sha256=syjwNt8HfVmaWXZ-ElFYsc-KwpnKQz2LE3K5jV7c3GE,1599
|
29
29
|
langchain_ocr_lib/language_mapping/language_mapping.py,sha256=VY7WkkZauoHNxkvgUYbig0rDmlKqDkz24cXMd6A7txM,700
|
@@ -32,7 +32,7 @@ langchain_ocr_lib/prompt_templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
|
|
32
32
|
langchain_ocr_lib/prompt_templates/ocr_prompt.py,sha256=3Be1AL-HJkxPnAP0DNH1MqvAxFWTCeM5UOKP63xkHsY,3543
|
33
33
|
langchain_ocr_lib/tracers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
34
|
langchain_ocr_lib/tracers/traced_chain.py,sha256=uxRkdLNn_G6dAsti_gUuF7muhIj10xrOUL7HUga40oc,3056
|
35
|
-
langchain_ocr_lib-0.3.
|
36
|
-
langchain_ocr_lib-0.3.
|
37
|
-
langchain_ocr_lib-0.3.
|
38
|
-
langchain_ocr_lib-0.3.
|
35
|
+
langchain_ocr_lib-0.3.3.dist-info/METADATA,sha256=97Ikd1_6Boq7RmZbcBZxjK_Cbns7K2XaLlxIVVFCMS4,6440
|
36
|
+
langchain_ocr_lib-0.3.3.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
37
|
+
langchain_ocr_lib-0.3.3.dist-info/entry_points.txt,sha256=l4mIs0tnIgbJYuVveZySQKVBnqNMHS-8ZZtLwz8ag5k,61
|
38
|
+
langchain_ocr_lib-0.3.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|