langchain-ocr-lib 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,22 +48,25 @@ def lib_di_config(binder: Binder):
48
48
  langfuse_settings = LangfuseSettings()
49
49
  llm_class_type_settings = LlmClassTypeSettings()
50
50
  language_settings = LanguageSettings()
51
-
51
+ model_name = ""
52
52
  if llm_class_type_settings.llm_type == "ollama":
53
53
  settings = OllamaSettings()
54
+ model_name = settings.model
54
55
  partial_llm_provider = partial(llm_provider,settings, ChatOllama)
55
56
  elif llm_class_type_settings.llm_type == "openai":
56
57
  settings = OpenAISettings()
58
+ model_name = settings.model_name
57
59
  partial_llm_provider = partial(llm_provider,settings, ChatOpenAI)
58
60
  elif llm_class_type_settings.llm_type == "vllm":
59
61
  settings = VllmSettings()
62
+ model_name = settings.model_name
60
63
  partial_llm_provider = partial(llm_provider,settings, ChatOpenAI)
61
64
  else:
62
65
  raise NotImplementedError("Configured LLM is not implemented")
63
66
 
64
67
  binder.bind_to_provider(LargeLanguageModelKey, partial_llm_provider)
65
68
 
66
- prompt = ocr_prompt_template_builder(language=language_settings.language, model_name=settings.model)
69
+ prompt = ocr_prompt_template_builder(language=language_settings.language, model_name=model_name)
67
70
 
68
71
  binder.bind(
69
72
  LangfuseClientKey,
@@ -80,17 +83,19 @@ def lib_di_config(binder: Binder):
80
83
  managed_prompts={
81
84
  OcrChain.__name__: prompt,
82
85
  },
86
+ enabled=langfuse_settings.enabled,
83
87
  ),
84
88
  )
85
89
 
86
- binder.bind(OcrChainKey, OcrChain())
90
+ binder.bind(OcrChainKey if langfuse_settings.enabled else LangfuseTracedChainKey, OcrChain())
87
91
 
88
- binder.bind(
89
- LangfuseTracedChainKey,
90
- LangfuseTracedChain(
91
- settings=langfuse_settings,
92
- ),
93
- )
92
+ if langfuse_settings.enabled:
93
+ binder.bind(
94
+ LangfuseTracedChainKey,
95
+ LangfuseTracedChain(
96
+ settings=langfuse_settings,
97
+ ),
98
+ )
94
99
 
95
100
  binder.bind(PdfConverterKey, Pdf2MarkdownConverter())
96
101
  binder.bind(ImageConverterKey, Image2MarkdownConverter())
@@ -32,8 +32,10 @@ class LangfuseManager:
32
32
  def __init__(
33
33
  self,
34
34
  managed_prompts: dict[str, str],
35
+ enabled: bool = True,
35
36
  ):
36
37
  self._managed_prompts = managed_prompts
38
+ self._enabled = enabled
37
39
 
38
40
  def get_langfuse_prompt(self, base_prompt_name: str) -> Optional[ChatPromptClient]:
39
41
  """
@@ -56,6 +58,10 @@ class LangfuseManager:
56
58
  Exception
57
59
  If an error occurs while retrieving the prompt template from Langfuse.
58
60
  """
61
+ langfuse_prompt = None
62
+ if not self._enabled:
63
+ logger.info("Langfuse is not enabled. Using fallback prompt.")
64
+ return langfuse_prompt
59
65
  try:
60
66
  langfuse_prompt = self._langfuse.get_prompt(base_prompt_name)
61
67
  except NotFoundError:
@@ -72,11 +78,7 @@ class LangfuseManager:
72
78
  )
73
79
  langfuse_prompt = self._langfuse.get_prompt(base_prompt_name)
74
80
  except Exception as error:
75
- logger.error(
76
- "Error occured while getting prompt template from langfuse. Error:\n{error}",
77
- extra={error: error},
78
- )
79
- return None
81
+ logger.error(f"Error occurred while getting prompt template from langfuse. Error:\n{error}")
80
82
  return langfuse_prompt
81
83
 
82
84
  def get_base_llm(self, name: str) -> LLM:
@@ -94,9 +96,12 @@ class LangfuseManager:
94
96
  The base Large Language Model. If the Langfuse prompt is not found,
95
97
  returns the LLM with a fallback configuration.
96
98
  """
99
+ if not self._enabled:
100
+ logger.info("Langfuse is not enabled. Using fallback LLM.")
101
+ return self._llm
97
102
  langfuse_prompt = self.get_langfuse_prompt(name)
98
103
  if not langfuse_prompt:
99
- logger.error("Using fallback for llm")
104
+ logger.warning("Could not retrieve prompt template from langfuse. Using fallback LLM.")
100
105
  return self._llm
101
106
 
102
107
  return self._llm.with_config({"configurable": langfuse_prompt.config})
@@ -121,7 +126,8 @@ class LangfuseManager:
121
126
  """
122
127
  langfuse_prompt = self.get_langfuse_prompt(name)
123
128
  if not langfuse_prompt:
124
- logger.error("Could not retrieve prompt template from langfuse. Using fallback value.")
129
+ if self._enabled:
130
+ logger.warning("Could not retrieve prompt template from langfuse. Using fallback value.")
125
131
  fallback = self._managed_prompts[name]
126
132
  if isinstance(fallback, ChatPromptTemplate):
127
133
  return fallback
@@ -43,6 +43,7 @@ def get_configurable_fields_from(settings: BaseSettings) -> dict[str, Configurab
43
43
  settings_of_interest = settings.model_fields[field_name]
44
44
  if settings_of_interest.title is not None:
45
45
  _fields[field_name] = ConfigurableField(id=field_name, name=settings_of_interest.title)
46
+
46
47
  return _fields
47
48
 
48
49
 
@@ -27,3 +27,7 @@ class LangfuseSettings(BaseSettings):
27
27
  secret_key: str = Field(default="", description="The secret key for Langfuse.")
28
28
  public_key: str = Field(default="", description="The public key for Langfuse.")
29
29
  host: str = Field(default="https://api.langchain.com", description="The host for Langfuse.")
30
+ enabled: bool = Field(
31
+ default=True,
32
+ description="Whether to enable Langfuse. If set to False, Langfuse will not be used.",
33
+ )
@@ -10,7 +10,7 @@ class OpenAISettings(BaseSettings):
10
10
 
11
11
  Attributes
12
12
  ----------
13
- model : str
13
+ model_name : str
14
14
  The model identifier.
15
15
  api_key : str
16
16
  The API key for authentication.
@@ -28,7 +28,12 @@ class OpenAISettings(BaseSettings):
28
28
  env_prefix = "OPENAI_"
29
29
  case_sensitive = False
30
30
 
31
- model: str = Field(default="gpt-4o-mini-search-preview-2025-03-11", description="The model identifier", title="LLM Model")
31
+ model_name: str = Field(
32
+ default="gpt-4o-mini-search-preview-2025-03-11",
33
+ env="MODEL",
34
+ description="The model identifier",
35
+ title="LLM Model",
36
+ )
32
37
  api_key: str = Field(default="", description="The API key for authentication")
33
38
  top_p: float = Field(default=1.0, description="Total probability mass of tokens to consider at each step", title="Top P")
34
39
  temperature: float = Field(default=0, description="What sampling temperature to use", title="Temperature")
@@ -10,7 +10,7 @@ class VllmSettings(BaseSettings):
10
10
 
11
11
  Attributes
12
12
  ----------
13
- model : str
13
+ model_name : str
14
14
  The model identifier.
15
15
  api_key : str
16
16
  The API key for authentication.
@@ -28,7 +28,12 @@ class VllmSettings(BaseSettings):
28
28
  env_prefix = "VLLM_"
29
29
  case_sensitive = False
30
30
 
31
- model: str = Field(default="", description="The model identifier", title="LLM Model")
31
+ model_name: str = Field(
32
+ default="",
33
+ env="MODEL",
34
+ description="The model identifier",
35
+ title="LLM Model",
36
+ )
32
37
  api_key: str = Field(default="", description="The API key for authentication")
33
38
  top_p: float = Field(default=1.0, description="Total probability mass of tokens to consider at each step", title="Top P")
34
39
  temperature: float = Field(default=0, description="What sampling temperature to use", title="Temperature")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: langchain-ocr-lib
3
- Version: 0.3.1
3
+ Version: 0.3.3
4
4
  Summary: Modular, vision-LLM-powered chain to convert image and PDF documents into clean Markdown.
5
5
  License: MIT
6
6
  Author: Andreas Klos
@@ -25,9 +25,13 @@ Requires-Dist: pytest-asyncio (>=0.25.0,<0.26.0)
25
25
  Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
26
26
  Description-Content-Type: text/markdown
27
27
 
28
- # langchain_ocr_lib
28
+ # langchain-ocr-lib
29
29
 
30
- **langchain_ocr_lib** is the OCR processing engine behind LangChain-OCR. It provides a modular, vision-LLM-powered Chain to convert image and PDF documents into clean Markdown. Designed for direct CLI usage or integration into larger applications.
30
+ **langchain-ocr-lib** is the OCR processing engine behind LangChain-OCR. It provides a modular, vision-LLM-powered Chain to convert image and PDF documents into clean Markdown. Designed for direct CLI usage or integration into larger applications.
31
+
32
+ <div align="center">
33
+ <img src="./images/logo.png" alt="Logo" style="width:30%;">
34
+ </div>
31
35
 
32
36
  ## Table of Contents
33
37
 
@@ -137,7 +141,7 @@ class Converter:
137
141
  return self._converter.convert2markdown(filename=filename)
138
142
 
139
143
  converter = Converter()
140
- markdown = converter.convert("../docs/invoice.pdf") # Adjust the file path as needed
144
+ markdown = converter.convert("../examples/invoice.pdf") # Adjust the file path as needed
141
145
  print(markdown)
142
146
  ```
143
147
 
@@ -174,7 +178,7 @@ class Converter:
174
178
  self._converter.convert(filename=filename)
175
179
 
176
180
  converter = Converter()
177
- converter.convert("../docs/invoice.pdf") # Adjust the file path as needed
181
+ converter.convert("../examples/invoice.pdf") # Adjust the file path as needed
178
182
  ```
179
183
 
180
184
  ### 4.3 Docker
@@ -183,6 +187,6 @@ Run OCR via Docker without local Python setup:
183
187
 
184
188
  ```bash
185
189
  docker build -t ocr -f langchain_ocr_lib/Dockerfile .
186
- docker run --net=host -it --rm -v ./docs:/app/docs:ro ocr docs/invoice.png
190
+ docker run --net=host -it --rm -v ./examples:/app/examples:ro ocr examples/invoice.png
187
191
  ```
188
192
 
@@ -5,7 +5,7 @@ langchain_ocr_lib/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
5
5
  langchain_ocr_lib/converter/converter.py,sha256=oDUNzVWD743RgqIal7T4OVv-Z1RKE9uQYzAIPpgY3o8,1280
6
6
  langchain_ocr_lib/di_binding_keys/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  langchain_ocr_lib/di_binding_keys/binding_keys.py,sha256=jE8rwNcLaI0NflIMkK0vu0LVy5o4y0pYgdjbpDNTGyk,338
8
- langchain_ocr_lib/di_config.py,sha256=K11ZHkUDP1TsYzZSRMnrbnroovw-_CCbyxHNo9kjRCw,3640
8
+ langchain_ocr_lib/di_config.py,sha256=MQT1U5TvoTRS8TDWsldPB994XepkJQ4k9XRd6HG4Yps,3935
9
9
  langchain_ocr_lib/impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  langchain_ocr_lib/impl/chains/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  langchain_ocr_lib/impl/chains/ocr_chain.py,sha256=stE8RLE1ieRHf6XHreKCRfhNfXzw9fNLTake7xQBGL8,2673
@@ -13,17 +13,17 @@ langchain_ocr_lib/impl/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
13
13
  langchain_ocr_lib/impl/converter/image_converter.py,sha256=G1rDOCbudWNL4sDvSGJ7CeeFrWUblfWPGaZf5JsnpiM,2871
14
14
  langchain_ocr_lib/impl/converter/pdf_converter.py,sha256=pTHPojuNLCSWJp4FzXBHshXva2sBGyOs6Y7jnKJrnNo,3760
15
15
  langchain_ocr_lib/impl/langfuse_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- langchain_ocr_lib/impl/langfuse_manager/langfuse_manager.py,sha256=nfIuOSOsewH6azNNtrsGxSrGI2Blt2fhbp-PBbgXJ2I,4995
16
+ langchain_ocr_lib/impl/langfuse_manager/langfuse_manager.py,sha256=6kvPfYObLmbYZAvtYNx7g9xz5wYz0PxohEzyctpGRto,5353
17
17
  langchain_ocr_lib/impl/llms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- langchain_ocr_lib/impl/llms/llm_factory.py,sha256=9DsUdoYNrjeWLGA9ISDdHN2cxcQ7DquNQ5it6zSxHlg,2199
18
+ langchain_ocr_lib/impl/llms/llm_factory.py,sha256=GSXBF5MKqo5zRdGEQE1pKI471dgUOYBRz7vCb_qg-pA,2204
19
19
  langchain_ocr_lib/impl/llms/llm_type.py,sha256=_LKtdVuTRYX6gupkxJtEtIwrbtiMvZmG8WOxfzlm42M,286
20
20
  langchain_ocr_lib/impl/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- langchain_ocr_lib/impl/settings/langfuse_settings.py,sha256=5lr3tVeiHXDUaYtWAnZPXrKxBJgM2wgaz7yyZThhCsE,812
21
+ langchain_ocr_lib/impl/settings/langfuse_settings.py,sha256=QT4_VwYj0msFbgL3qIQ-oer3Lt0qny0FFAyfssGu-q0,962
22
22
  langchain_ocr_lib/impl/settings/language_settings.py,sha256=tdAC1t5wGu1MoH1jhjkDnxnX4Ui7giwxt7Qm8_LPkP8,627
23
23
  langchain_ocr_lib/impl/settings/llm_class_type_settings.py,sha256=4KC6zxby13wn38rB8055J8LNVTsmUfrOiyLtLuToHaM,598
24
24
  langchain_ocr_lib/impl/settings/ollama_chat_settings.py,sha256=YQkgD7CfOjHN5wkpJakO0GfM7-D2GqoJLP1gB2932ms,1525
25
- langchain_ocr_lib/impl/settings/openai_chat_settings.py,sha256=NqVfkcI8OoD8TVxyv4l0G9ycUC6LIs6Qs4kQRL24doA,1331
26
- langchain_ocr_lib/impl/settings/vllm_chat_settings.py,sha256=Zr4L6Urp-f1JZu7Q1dwL6671EQbrIIYL0ubJSQlod3c,1281
25
+ langchain_ocr_lib/impl/settings/openai_chat_settings.py,sha256=W1vPclnXfIDYRfZVMkdGX7ufbbF-cAQiWiKCmjrORO4,1393
26
+ langchain_ocr_lib/impl/settings/vllm_chat_settings.py,sha256=h4TFUt4iMZpZSI7qRNe9EB2n_nSjbSjDxSG6R5D3Ah8,1343
27
27
  langchain_ocr_lib/impl/tracers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  langchain_ocr_lib/impl/tracers/langfuse_traced_chain.py,sha256=syjwNt8HfVmaWXZ-ElFYsc-KwpnKQz2LE3K5jV7c3GE,1599
29
29
  langchain_ocr_lib/language_mapping/language_mapping.py,sha256=VY7WkkZauoHNxkvgUYbig0rDmlKqDkz24cXMd6A7txM,700
@@ -32,7 +32,7 @@ langchain_ocr_lib/prompt_templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
32
32
  langchain_ocr_lib/prompt_templates/ocr_prompt.py,sha256=3Be1AL-HJkxPnAP0DNH1MqvAxFWTCeM5UOKP63xkHsY,3543
33
33
  langchain_ocr_lib/tracers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  langchain_ocr_lib/tracers/traced_chain.py,sha256=uxRkdLNn_G6dAsti_gUuF7muhIj10xrOUL7HUga40oc,3056
35
- langchain_ocr_lib-0.3.1.dist-info/METADATA,sha256=3FLR8CPBSpusTzAslaBIC5_4sz27ofyvkDQ3oGl4Nwo,6329
36
- langchain_ocr_lib-0.3.1.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
37
- langchain_ocr_lib-0.3.1.dist-info/entry_points.txt,sha256=l4mIs0tnIgbJYuVveZySQKVBnqNMHS-8ZZtLwz8ag5k,61
38
- langchain_ocr_lib-0.3.1.dist-info/RECORD,,
35
+ langchain_ocr_lib-0.3.3.dist-info/METADATA,sha256=97Ikd1_6Boq7RmZbcBZxjK_Cbns7K2XaLlxIVVFCMS4,6440
36
+ langchain_ocr_lib-0.3.3.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
37
+ langchain_ocr_lib-0.3.3.dist-info/entry_points.txt,sha256=l4mIs0tnIgbJYuVveZySQKVBnqNMHS-8ZZtLwz8ag5k,61
38
+ langchain_ocr_lib-0.3.3.dist-info/RECORD,,