langchain-ocr-lib 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. langchain_ocr_lib/__init__.py +0 -0
  2. langchain_ocr_lib/chains/__init__.py +0 -0
  3. langchain_ocr_lib/chains/chain.py +55 -0
  4. langchain_ocr_lib/converter/__init__.py +0 -0
  5. langchain_ocr_lib/converter/converter.py +52 -0
  6. langchain_ocr_lib/di_config.py +86 -0
  7. langchain_ocr_lib/impl/__init__.py +0 -0
  8. langchain_ocr_lib/impl/chains/__init__.py +0 -0
  9. langchain_ocr_lib/impl/chains/ocr_chain.py +86 -0
  10. langchain_ocr_lib/impl/converter/__init__.py +0 -0
  11. langchain_ocr_lib/impl/converter/image_converter.py +88 -0
  12. langchain_ocr_lib/impl/converter/pdf_converter.py +105 -0
  13. langchain_ocr_lib/impl/langfuse_manager/__init__.py +0 -0
  14. langchain_ocr_lib/impl/langfuse_manager/langfuse_manager.py +149 -0
  15. langchain_ocr_lib/impl/llms/__init__.py +0 -0
  16. langchain_ocr_lib/impl/llms/llm_factory.py +66 -0
  17. langchain_ocr_lib/impl/llms/llm_type.py +11 -0
  18. langchain_ocr_lib/impl/settings/__init__.py +0 -0
  19. langchain_ocr_lib/impl/settings/langfuse_settings.py +29 -0
  20. langchain_ocr_lib/impl/settings/language_settings.py +25 -0
  21. langchain_ocr_lib/impl/settings/llm_class_type_settings.py +27 -0
  22. langchain_ocr_lib/impl/settings/ollama_chat_settings.py +42 -0
  23. langchain_ocr_lib/impl/settings/openai_chat_settings.py +35 -0
  24. langchain_ocr_lib/impl/tracers/__init__.py +0 -0
  25. langchain_ocr_lib/impl/tracers/langfuse_traced_chain.py +44 -0
  26. langchain_ocr_lib/language_mapping/language_mapping.py +19 -0
  27. langchain_ocr_lib/main.py +122 -0
  28. langchain_ocr_lib/prompt_templates/__init__.py +0 -0
  29. langchain_ocr_lib/prompt_templates/ocr_prompt.py +60 -0
  30. langchain_ocr_lib/tracers/__init__.py +0 -0
  31. langchain_ocr_lib/tracers/traced_chain.py +88 -0
  32. langchain_ocr_lib-0.1.0.dist-info/METADATA +28 -0
  33. langchain_ocr_lib-0.1.0.dist-info/RECORD +35 -0
  34. langchain_ocr_lib-0.1.0.dist-info/WHEEL +4 -0
  35. langchain_ocr_lib-0.1.0.dist-info/entry_points.txt +3 -0
File without changes
File without changes
@@ -0,0 +1,55 @@
1
+ """Module for the base class of chains."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, Optional
5
+
6
+ from langchain_core.runnables import Runnable, RunnableConfig
7
+ from langchain_core.runnables.utils import Input, Output
8
+
9
+
10
+ class Chain(Runnable[Input, Output], ABC):
11
+ """Base class for chains."""
12
+
13
+ @abstractmethod
14
+ async def ainvoke(self, chain_input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any) -> Output:
15
+ """Asynchronously invoke the chain with the given input and configuration.
16
+
17
+ Parameters
18
+ ----------
19
+ chain_input : Input
20
+ The input data required to asynchronously invoke the chain.
21
+ config : Optional[RunnableConfig], optional
22
+ The configuration settings for the chain invocation, by default None.
23
+ **kwargs : Any
24
+ Additional keyword arguments that may be required for the chain invocation.
25
+
26
+ Returns
27
+ -------
28
+ Output
29
+ The result of the chain invocation.
30
+ """
31
+
32
+ @abstractmethod
33
+ def invoke(self, chain_input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any) -> Output:
34
+ """
35
+ Invoke the chain with the given input and configuration.
36
+
37
+ Typing indicates `Output` will be the return, but because no implementation is planned,
38
+ this will never be returned. This method is not implemented and will raise a not implemented error.
39
+
40
+ Notes
41
+ -----
42
+ This method should never be called. It exists only because the base class requires an implementation.
43
+
44
+ Parameters
45
+ ----------
46
+ chain_input : Input
47
+ The input data required to invoke the chain.
48
+ config : Optional[RunnableConfig], optional
49
+ The configuration settings for the chain invocation, by default None.
50
+
51
+ Returns
52
+ -------
53
+ Output
54
+ The result of the chain invocation.
55
+ """
File without changes
@@ -0,0 +1,52 @@
1
+ """Module for the File2MarkdownConverter class."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ import inject
5
+
6
+
7
+ class File2MarkdownConverter(ABC):
8
+ """Abstract base class for the File2MarkdownConverter class."""
9
+
10
+ _chain = inject.attr("LangfuseTracedChain")
11
+
12
+ @abstractmethod
13
+ async def aconvert2markdown(self, file: bytes) -> str:
14
+ """Asynchronously convert file to markdown format.
15
+
16
+ Parameters
17
+ ----------
18
+ file : bytes
19
+ The file to convert.
20
+
21
+ Returns
22
+ -------
23
+ str
24
+ The markdown representation of the file.
25
+
26
+ Raises
27
+ ------
28
+ NotImplementedError
29
+ If the method is not implemented.
30
+ """
31
+ raise NotImplementedError
32
+
33
+ @abstractmethod
34
+ def convert2markdown(self, file: bytes) -> str:
35
+ """Convert file to markdown format.
36
+
37
+ Parameters
38
+ ----------
39
+ file : bytes
40
+ The file to convert.
41
+
42
+ Returns
43
+ -------
44
+ str
45
+ The markdown representation of the file.
46
+
47
+ Raises
48
+ ------
49
+ NotImplementedError
50
+ If the method is not implemented.
51
+ """
52
+ raise NotImplementedError
@@ -0,0 +1,86 @@
1
+ """Module containing the dependency injection container for managing application dependencies."""
2
+
3
+ from inject import Binder
4
+ import inject
5
+ from langchain_ollama import ChatOllama
6
+ from langchain_openai import ChatOpenAI
7
+ from langfuse import Langfuse
8
+
9
+ from langchain_ocr_lib.impl.chains.ocr_chain import OcrChain
10
+ from langchain_ocr_lib.impl.settings.ollama_chat_settings import OllamaSettings
11
+ from langchain_ocr_lib.impl.settings.openai_chat_settings import OpenAISettings
12
+ from langchain_ocr_lib.impl.settings.llm_class_type_settings import LlmClassTypeSettings
13
+ from langchain_ocr_lib.impl.settings.langfuse_settings import LangfuseSettings
14
+ from langchain_ocr_lib.impl.settings.language_settings import LanguageSettings
15
+ from langchain_ocr_lib.impl.tracers.langfuse_traced_chain import LangfuseTracedChain
16
+ from langchain_ocr_lib.prompt_templates.ocr_prompt import ocr_prompt_template_builder
17
+ from langchain_ocr_lib.impl.llms.llm_factory import llm_provider
18
+ from langchain_ocr_lib.impl.langfuse_manager.langfuse_manager import LangfuseManager
19
+ from langchain_ocr_lib.impl.converter.pdf_converter import Pdf2MarkdownConverter
20
+ from langchain_ocr_lib.impl.converter.image_converter import Image2MarkdownConverter
21
+
22
+
23
+ def lib_di_config(binder: Binder):
24
+ """Configure dependency injection bindings for the OCR library.
25
+
26
+ Parameters
27
+ ----------
28
+ binder : Binder
29
+ The dependency injection binder instance used to register the bindings.
30
+
31
+ Raises
32
+ ------
33
+ NotImplementedError
34
+ If the configured LLM type is not implemented.
35
+
36
+ """
37
+ langfuse_settings = LangfuseSettings()
38
+ llm_class_type_settings = LlmClassTypeSettings()
39
+ language_settings = LanguageSettings()
40
+
41
+ if llm_class_type_settings.llm_type == "ollama":
42
+ settings = OllamaSettings()
43
+ llm_instance = llm_provider(settings, ChatOllama)
44
+ elif llm_class_type_settings.llm_type == "openai":
45
+ settings = OpenAISettings()
46
+ llm_instance = llm_provider(settings, ChatOpenAI)
47
+ else:
48
+ raise NotImplementedError("Configured LLM is not implemented")
49
+ binder.bind("LargeLanguageModel", llm_instance)
50
+
51
+ prompt = ocr_prompt_template_builder(language=language_settings.language, model_name=settings.model)
52
+
53
+ binder.bind(
54
+ "LangfuseClient",
55
+ Langfuse(
56
+ public_key=langfuse_settings.public_key,
57
+ secret_key=langfuse_settings.secret_key,
58
+ host=langfuse_settings.host,
59
+ ),
60
+ )
61
+
62
+ binder.bind(
63
+ "LangfuseManager",
64
+ LangfuseManager(
65
+ managed_prompts={
66
+ OcrChain.__name__: prompt,
67
+ },
68
+ ),
69
+ )
70
+
71
+ binder.bind("OcrChain", OcrChain())
72
+
73
+ binder.bind(
74
+ "LangfuseTracedChain",
75
+ LangfuseTracedChain(
76
+ settings=langfuse_settings,
77
+ ),
78
+ )
79
+
80
+ binder.bind("PdfConverter", Pdf2MarkdownConverter())
81
+ binder.bind("ImageConverter", Image2MarkdownConverter())
82
+
83
+
84
+ def configure_di():
85
+ """Configure dependency injection using the `inject` library."""
86
+ inject.configure(lib_di_config, allow_override=True, clear=True)
File without changes
File without changes
@@ -0,0 +1,86 @@
1
+ """Module for LLM answer generation chain."""
2
+
3
+ from typing import Any, Optional
4
+
5
+ from langchain_core.runnables import Runnable, RunnableConfig
6
+ from langchain_core.runnables.utils import Input
7
+ import inject
8
+
9
+ from langchain_ocr_lib.chains.chain import Chain
10
+
11
+ RunnableInput = Input # TODO: adjust properly
12
+ RunnableOutput = str
13
+
14
+
15
+ class OcrChain(Chain[RunnableInput, RunnableOutput]):
16
+ """Base class for LLM answer generation chain."""
17
+
18
+ _langfuse_manager = inject.attr("LangfuseManager")
19
+
20
+ def __init__(self):
21
+ """Initialize the AnswerGenerationChain.
22
+
23
+ Parameters
24
+ ----------
25
+ langfuse_manager : LangfuseManager
26
+ Manager instance for handling Langfuse operations and monitoring
27
+ """
28
+
29
+ async def ainvoke(
30
+ self, chain_input: RunnableInput, config: Optional[RunnableConfig] = None, **kwargs: Any
31
+ ) -> RunnableOutput:
32
+ """
33
+ Asynchronously invokes the chain with given input.
34
+
35
+ Parameters
36
+ ----------
37
+ chain_input : RunnableInput
38
+ The input to be processed by the chain.
39
+ chain_config : Optional[RunnableConfig]
40
+ Configuration for the chain execution (default None).
41
+ **kwargs : Any
42
+ Additional keyword arguments passed to the chain.
43
+
44
+ Returns
45
+ -------
46
+ RunnableOutput
47
+ The output generated by the chain.
48
+
49
+ Raises
50
+ ------
51
+ ChainError
52
+ If an error occurs during chain execution.
53
+ """
54
+ return await self._create_chain().ainvoke(chain_input, config=config)
55
+
56
+ def invoke(
57
+ self, chain_input: RunnableInput, config: Optional[RunnableConfig] = None, **kwargs: Any
58
+ ) -> RunnableOutput:
59
+ """
60
+ Invoke the chain with given input.
61
+
62
+ Parameters
63
+ ----------
64
+ chain_input : RunnableInput
65
+ The input to be processed by the chain.
66
+ chain_config : Optional[RunnableConfig]
67
+ Configuration for the chain execution (default None).
68
+ **kwargs : Any
69
+ Additional keyword arguments passed to the chain.
70
+
71
+ Returns
72
+ -------
73
+ RunnableOutput
74
+ The output generated by the chain.
75
+
76
+ Raises
77
+ ------
78
+ ChainError
79
+ If an error occurs during chain execution.
80
+ """
81
+ return self._create_chain().invoke(chain_input, config=config)
82
+
83
+ def _create_chain(self) -> Runnable:
84
+ return self._langfuse_manager.get_base_prompt(self.__class__.__name__) | self._langfuse_manager.get_base_llm(
85
+ self.__class__.__name__
86
+ )
File without changes
@@ -0,0 +1,88 @@
1
+ """Module for converting an image to markdown using a Langchain chain."""
2
+
3
+ import io
4
+ import base64
5
+ from PIL import Image
6
+ from PIL.ImageFile import ImageFile
7
+
8
+ from langchain_ocr_lib.converter.converter import File2MarkdownConverter
9
+
10
+
11
+ class Image2MarkdownConverter(File2MarkdownConverter):
12
+ """Converts an image to markdown using a Langchain chain."""
13
+
14
+ async def aconvert2markdown(self, file: ImageFile | None = None, filename: str | None = None) -> str:
15
+ """
16
+ Asynchronously converts an image to markdown using a Langchain chain.
17
+
18
+ Parameters
19
+ ----------
20
+ file : ImageFile | None, optional
21
+ PIL Image object to convert, by default None
22
+ filename : str | None, optional
23
+ Path to the image file to convert, by default None
24
+
25
+ Returns
26
+ -------
27
+ str
28
+ Markdown representation of the image.
29
+
30
+ Raises
31
+ ------
32
+ ValueError
33
+ If no file or filename is provided.
34
+ ValueError
35
+ If the file is corrupted or the file type is unsupported.
36
+ """
37
+ if file is None and filename is None:
38
+ raise ValueError("No file provided")
39
+ if file is None:
40
+ try:
41
+ file = Image.open(filename)
42
+ except Exception as e:
43
+ raise ValueError("Image corrupted or unsupported file type, %s" % e)
44
+
45
+ buf = io.BytesIO()
46
+ file.save(buf, format="PNG")
47
+ base64_img = base64.b64encode(buf.getvalue()).decode("utf-8")
48
+ response = await self._chain.ainvoke({"image_data": base64_img})
49
+
50
+ return response.content
51
+
52
+ def convert2markdown(self, file: ImageFile | None = None, filename: str | None = None) -> str:
53
+ """
54
+ Convert an image to markdown using a Langchain chain.
55
+
56
+ Parameters
57
+ ----------
58
+ file : ImageFile | None, optional
59
+ PIL Image object to convert, by default None
60
+ filename : str | None, optional
61
+ Path to the image file to convert, by default None
62
+
63
+ Returns
64
+ -------
65
+ str
66
+ Markdown representation of the image.
67
+
68
+ Raises
69
+ ------
70
+ ValueError
71
+ If no file or filename is provided.
72
+ ValueError
73
+ If the file is corrupted or the file type is unsupported.
74
+ """
75
+ if file is None and filename is None:
76
+ raise ValueError("No file provided")
77
+ if file is None:
78
+ try:
79
+ file = Image.open(filename)
80
+ except Exception as e:
81
+ raise ValueError("Image corrupted or unsupported file type, %s" % e)
82
+
83
+ buf = io.BytesIO()
84
+ file.save(buf, format="PNG")
85
+ base64_img = base64.b64encode(buf.getvalue()).decode("utf-8")
86
+ response = self._chain.invoke({"image_data": base64_img})
87
+
88
+ return response.content
@@ -0,0 +1,105 @@
1
+ """Module for converting PDF files to markdown."""
2
+
3
+ from pdf2image import convert_from_bytes
4
+ import io
5
+ import base64
6
+
7
+ from langchain_ocr_lib.converter.converter import File2MarkdownConverter
8
+
9
+
10
+ class Pdf2MarkdownConverter(File2MarkdownConverter):
11
+ """Converts PDF files to markdown format.
12
+
13
+ This class provides methods to convert PDF files, either provided as bytes or by filename,
14
+ into markdown format.
15
+
16
+ Attributes
17
+ ----------
18
+ _chain : Chain
19
+ The OCR chain used to process images.
20
+ """
21
+
22
+ async def aconvert2markdown(self, file: bytes | None = None, filename: str | None = None) -> str:
23
+ """Asynchronously converts a PDF file (either provided as bytes or by filename) into markdown.
24
+
25
+ Parameters
26
+ ----------
27
+ file : bytes, optional
28
+ The PDF file as bytes. Defaults to None.
29
+ filename : str, optional
30
+ The path to the PDF file. Defaults to None.
31
+
32
+ Returns
33
+ -------
34
+ str
35
+ The markdown representation of the PDF content extracted via OCR.
36
+
37
+ Raises
38
+ ------
39
+ ValueError
40
+ If neither `file` nor `filename` is provided.
41
+ ValueError
42
+ If the PDF file is corrupted or the file type is unsupported.
43
+ """
44
+ if file is None and filename is None:
45
+ raise ValueError("No file provided")
46
+ if file is None:
47
+ try:
48
+ with open(filename, "rb") as f:
49
+ file = f.read()
50
+ except Exception as e:
51
+ raise ValueError("PDF corrupted or unsupported file type, %s" % e)
52
+
53
+ images = convert_from_bytes(file)
54
+
55
+ markdown = ""
56
+ for image in images:
57
+ # Wrap the image in a Document if your chain expects it.
58
+ buf = io.BytesIO()
59
+ image.save(buf, format="PNG")
60
+ base64_img = base64.b64encode(buf.getvalue()).decode("utf-8")
61
+ response = await self._chain.ainvoke({"image_data": base64_img})
62
+ markdown += response.content
63
+ return markdown
64
+
65
+ def convert2markdown(self, file: bytes | None = None, filename: str | None = None) -> str:
66
+ """Convert a PDF file (either provided as bytes or by filename) into markdown.
67
+
68
+ Parameters
69
+ ----------
70
+ file : bytes, optional
71
+ The PDF file as bytes. Defaults to None.
72
+ filename : str, optional
73
+ The path to the PDF file. Defaults to None.
74
+
75
+ Returns
76
+ -------
77
+ str
78
+ The markdown representation of the PDF content extracted via OCR.
79
+
80
+ Raises
81
+ ------
82
+ ValueError
83
+ If neither `file` nor `filename` is provided.
84
+ ValueError
85
+ If the PDF file is corrupted or the file type is unsupported.
86
+ """
87
+ if file is None and filename is None:
88
+ raise ValueError("No file provided")
89
+ if file is None:
90
+ try:
91
+ with open(filename, "rb") as f:
92
+ file = f.read()
93
+ except Exception as e:
94
+ raise ValueError("PDF corrupted or unsupported file type") from e
95
+
96
+ images = convert_from_bytes(file)
97
+
98
+ markdown = ""
99
+ for image in images:
100
+ buf = io.BytesIO()
101
+ image.save(buf, format="PNG")
102
+ base64_img = base64.b64encode(buf.getvalue()).decode("utf-8")
103
+ response = self._chain.invoke({"image_data": base64_img})
104
+ markdown += response.content
105
+ return markdown
File without changes
@@ -0,0 +1,149 @@
1
+ # spell-checker: disable
2
+ """Module for managing Langfuse prompts and Langfuse Language Models (LLMs)."""
3
+ import logging
4
+ from typing import Optional
5
+ import inject
6
+ import json
7
+
8
+ from langchain.prompts import ChatPromptTemplate
9
+ from langchain_core.language_models.llms import LLM
10
+ from langfuse.api.resources.commons.errors.not_found_error import NotFoundError
11
+ from langfuse.model import ChatPromptClient
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class LangfuseManager:
17
+ """Manage prompts using Langfuse and a Large Language Model (LLM).
18
+
19
+ Attributes
20
+ ----------
21
+ API_KEY_FILTER : str
22
+ A filter string used to exclude the API key from configurations.
23
+ """
24
+
25
+ API_KEY_FILTER: str = "api_key"
26
+ _llm = inject.attr("LargeLanguageModel")
27
+ _langfuse = inject.attr("LangfuseClient")
28
+
29
+ def __init__(
30
+ self,
31
+ managed_prompts: dict[str, str],
32
+ ):
33
+ self._managed_prompts = managed_prompts
34
+
35
+ def init_prompts(self) -> None:
36
+ """
37
+ Initialize the prompts managed by the LangfuseManager.
38
+
39
+ This method iterates over the keys of the managed prompts and retrieves
40
+ each prompt using the `get_langfuse_prompt` method.
41
+
42
+ Returns
43
+ -------
44
+ None
45
+ """
46
+ for key in list(self._managed_prompts.keys()):
47
+ self.get_langfuse_prompt(key)
48
+
49
+ def get_langfuse_prompt(self, base_prompt_name: str) -> Optional[ChatPromptClient]:
50
+ """
51
+ Retrieve the prompt from Langfuse Prompt Management.
52
+
53
+ Parameters
54
+ ----------
55
+ base_prompt_name : str
56
+ The name of the base prompt to retrieve.
57
+
58
+ Returns
59
+ -------
60
+ Optional[TextPromptClient]
61
+ The Langfuse prompt template if found, otherwise None.
62
+
63
+ Raises
64
+ ------
65
+ NotFoundError
66
+ If the prompt is not found in Langfuse, a new prompt is created.
67
+ Exception
68
+ If an error occurs while retrieving the prompt template from Langfuse.
69
+ """
70
+ try:
71
+ langfuse_prompt = self._langfuse.get_prompt(base_prompt_name)
72
+ except NotFoundError:
73
+ logger.info("Prompt not found in LangFuse. Creating new.")
74
+ llm_configurable_configs = {
75
+ config.id: config.default for config in self._llm.config_specs if self.API_KEY_FILTER not in config.id
76
+ }
77
+ self._langfuse.create_prompt(
78
+ name=base_prompt_name,
79
+ prompt=self._managed_prompts[base_prompt_name],
80
+ config=llm_configurable_configs,
81
+ labels=["production"],
82
+ type="chat",
83
+ )
84
+ langfuse_prompt = self._langfuse.get_prompt(base_prompt_name)
85
+ except Exception as error:
86
+ logger.error(
87
+ "Error occured while getting prompt template from langfuse. Error:\n{error}",
88
+ extra={error: error},
89
+ )
90
+ return None
91
+ return langfuse_prompt
92
+
93
+ def get_base_llm(self, name: str) -> LLM:
94
+ """
95
+ Get the Langfuse prompt, the configuration as well as Large Language Model (LLM).
96
+
97
+ Parameters
98
+ ----------
99
+ name : str
100
+ The name of the Langfuse prompt to retrieve the configuration for.
101
+
102
+ Returns
103
+ -------
104
+ LLM
105
+ The base Large Language Model. If the Langfuse prompt is not found,
106
+ returns the LLM with a fallback configuration.
107
+ """
108
+ langfuse_prompt = self.get_langfuse_prompt(name)
109
+ if not langfuse_prompt:
110
+ logger.error("Using fallback for llm")
111
+ return self._llm
112
+
113
+ return self._llm.with_config({"configurable": langfuse_prompt.config})
114
+
115
+ def get_base_prompt(self, name: str) -> ChatPromptTemplate:
116
+ """
117
+ Retrieve the base prompt from Langfuse Prompt Management.
118
+
119
+ Parameters
120
+ ----------
121
+ name : str
122
+ The name of the prompt to retrieve.
123
+
124
+ Returns
125
+ -------
126
+ PromptTemplate
127
+ The base prompt template.
128
+
129
+ Notes
130
+ -----
131
+ If the prompt cannot be retrieved from Langfuse, a fallback value is used.
132
+ """
133
+ langfuse_prompt = self.get_langfuse_prompt(name)
134
+ if not langfuse_prompt:
135
+ logger.error("Could not retrieve prompt template from langfuse. Using fallback value.")
136
+ fallback = self._managed_prompts[name]
137
+ if isinstance(fallback, ChatPromptTemplate):
138
+ return fallback
139
+ if isinstance(fallback, list) and len(fallback) > 0 and isinstance(fallback[0], dict) and "content" in fallback[0]:
140
+ image_payload = [{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,{image_data}"}}]
141
+ return ChatPromptTemplate.from_messages([("system", fallback[0]["content"]), ("user", image_payload)])
142
+ else:
143
+ logger.error("Unexpected structure for fallback prompt.")
144
+ raise ValueError("Unexpected structure for fallback prompt.")
145
+ langchain_prompt = langfuse_prompt.get_langchain_prompt()
146
+
147
+ langchain_prompt[-1] = ("user", json.loads(langchain_prompt[-1][1]))
148
+
149
+ return ChatPromptTemplate.from_messages(langchain_prompt)
File without changes
@@ -0,0 +1,66 @@
1
+ """Module for creating LLM instances from settings and the LLM class."""
2
+
3
+ from typing import Callable, Type
4
+
5
+ from langchain_community.llms.ollama import Ollama
6
+ from langchain_core.language_models.llms import LLM
7
+ from langchain_core.runnables import ConfigurableField
8
+ from pydantic_settings import BaseSettings
9
+
10
+
11
+ def _generic_llm_factory(
12
+ llm_class: Type[LLM],
13
+ configurable_fields: dict[str, ConfigurableField],
14
+ ) -> Callable[[BaseSettings], LLM]:
15
+ def factory(settings: BaseSettings) -> LLM:
16
+ llm_instance = llm_class(**settings.model_dump())
17
+ return llm_instance.configurable_fields(**configurable_fields)
18
+
19
+ return factory
20
+
21
+
22
+ def get_configurable_fields_from(settings: BaseSettings) -> dict[str, ConfigurableField]:
23
+ """
24
+ Extract configurable fields from the given settings.
25
+
26
+ Parameters
27
+ ----------
28
+ settings : BaseSettings
29
+ An instance of BaseSettings containing model fields with their respective settings.
30
+
31
+ Returns
32
+ -------
33
+ dict[str, ConfigurableField]
34
+ A dictionary where the keys are field names and the values are ConfigurableField instances
35
+ with the field's id and name set based on the settings.
36
+
37
+ Notes
38
+ -----
39
+ Only fields with a non-None title in their settings are included in the returned dictionary.
40
+ """
41
+ _fields = {}
42
+ for field_name in settings.model_fields:
43
+ settings_of_interest = settings.model_fields[field_name]
44
+ if settings_of_interest.title is not None:
45
+ _fields[field_name] = ConfigurableField(id=field_name, name=settings_of_interest.title)
46
+ return _fields
47
+
48
+
49
+ def llm_provider(settings: BaseSettings, llm_cls: Type[LLM] = Ollama) -> LLM:
50
+ """
51
+ Create an instance of a LLM provider based on the given settings and class type.
52
+
53
+ Parameters
54
+ ----------
55
+ settings : BaseSettings
56
+ Configuration settings for the LLM.
57
+ llm_cls : Type[LLM], optional
58
+ The class type of the LLM to instantiate (default Ollama).
59
+
60
+ Returns
61
+ -------
62
+ LLM
63
+ An instance of the specified language model provider.
64
+ """
65
+ provider = _generic_llm_factory(llm_cls, get_configurable_fields_from(settings))
66
+ return provider(settings)
@@ -0,0 +1,11 @@
1
+ """Module containing the Large Language Model (LLM) type enum class."""
2
+
3
+ from enum import StrEnum, unique
4
+
5
+
6
+ @unique
7
+ class LLMType(StrEnum):
8
+ """Enum class representing different types of Large Language Models (LLMs)."""
9
+
10
+ OLLAMA = "ollama"
11
+ OPENAI = "openai"
File without changes
@@ -0,0 +1,29 @@
1
+ """Contains settings regarding Langfuse."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+
7
+ class LangfuseSettings(BaseSettings):
8
+ """
9
+ Contains settings regarding Langfuse.
10
+
11
+ Attributes
12
+ ----------
13
+ secret_key : str
14
+ The secret key for Langfuse.
15
+ public_key : str
16
+ The public key for Langfuse.
17
+ host : str
18
+ The host for Langfuse.
19
+ """
20
+
21
+ class Config:
22
+ """Config class for reading Fields from env."""
23
+
24
+ env_prefix = "LANGFUSE_"
25
+ case_sensitive = False
26
+
27
+ secret_key: str = Field(default="", description="The secret key for Langfuse.")
28
+ public_key: str = Field(default="", description="The public key for Langfuse.")
29
+ host: str = Field(default="https://api.langchain.com", description="The host for Langfuse.")
@@ -0,0 +1,25 @@
1
+ """Module containing the LanguageSettings class."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+
7
+ class LanguageSettings(BaseSettings):
8
+ """
9
+ Contains settings regarding the language used for OCR.
10
+
11
+ Attributes
12
+ ----------
13
+ language : str
14
+ The language to use for OCR.
15
+ """
16
+
17
+ class Config:
18
+ """Config class for reading fields from environment variables."""
19
+
20
+ env_prefix = "OCR_"
21
+ case_sensitive = False
22
+
23
+ language: str = Field(
24
+ default="en", description="The language in iso 639-1 format, e.g. 'en' for English, 'de' for German, etc."
25
+ )
@@ -0,0 +1,27 @@
1
+ """Module for the LLM class type settings."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+ from langchain_ocr_lib.impl.llms.llm_type import LLMType
7
+
8
+
9
+ class LlmClassTypeSettings(BaseSettings):
10
+ """Settings for the LLM class type.
11
+
12
+ Attributes
13
+ ----------
14
+ llm_type : LLMType
15
+ The type of LLM to use. Defaults to LLMType.OLLAMA.
16
+
17
+ """
18
+
19
+ class Config:
20
+ """Config class for reading Fields from env."""
21
+
22
+ env_prefix = "RAG_CLASS_TYPE_"
23
+ case_sensitive = False
24
+
25
+ llm_type: LLMType = Field(
26
+ default=LLMType.OLLAMA,
27
+ )
@@ -0,0 +1,42 @@
1
+ """Module that contains settings regarding the LLM."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+
7
+ class OllamaSettings(BaseSettings):
8
+ """
9
+ Contains settings regarding the LLM.
10
+
11
+ Attributes
12
+ ----------
13
+ model : str
14
+ The model name to be used.
15
+ base_url : str
16
+ The base URL for the LLM.
17
+ top_k : int
18
+ Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers,
19
+ while a lower value (e.g. 10) will be more conservative.
20
+ top_p : float
21
+ Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text,
22
+ while a lower value (e.g., 0.5) will generate more focused and conservative text.
23
+ temperature : float
24
+ The temperature of the model. Increasing the temperature will make the model answer more creatively.
25
+
26
+ Notes
27
+ -----
28
+ If the title of a field is provided, the field will be configurable in the Langfuse UI
29
+ the field names should match the names of the attributes in the corresponding LLM class!
30
+ """
31
+
32
+ class Config:
33
+ """Config class for reading Fields from env."""
34
+
35
+ env_prefix = "OLLAMA_"
36
+ case_sensitive = False
37
+
38
+ model: str = Field(default="gemma3:4b-it-q4_K_M")
39
+ base_url: str = Field(default="http://localhost:11434")
40
+ top_k: int = Field(default=0, title="LLM Top K")
41
+ top_p: float = Field(default=0, title="LLM Top P")
42
+ temperature: float = Field(default=0, title="LLM Temperature")
@@ -0,0 +1,35 @@
1
+ """Module contains settings regarding the OpenAI API."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+
7
+ class OpenAISettings(BaseSettings):
8
+ """
9
+ Contains settings regarding the OpenAI API.
10
+
11
+ Attributes
12
+ ----------
13
+ model : str
14
+ The model identifier.
15
+ api_key : str
16
+ The API key for authentication.
17
+ top_p : float
18
+ Total probability mass of tokens to consider at each step.
19
+ temperature : float
20
+ What sampling temperature to use.
21
+ vision_capable : bool
22
+ Flag to enable a vision capable model.
23
+ """
24
+
25
+ class Config:
26
+ """Config class for reading fields from environment variables."""
27
+
28
+ env_prefix = "OPENAI_"
29
+ case_sensitive = False
30
+
31
+ model: str = Field(default="gpt-4o-mini-search-preview-2025-03-11", description="The model identifier")
32
+ api_key: str = Field(default="", description="The API key for authentication")
33
+ top_p: float = Field(default=1.0, description="Total probability mass of tokens to consider at each step")
34
+ temperature: float = Field(default=0.7, description="What sampling temperature to use")
35
+ vision_capable: bool = Field(default=False, description="Enable a vision capable model")
File without changes
@@ -0,0 +1,44 @@
1
+ """Module for the LangfuseTraceChain class."""
2
+
3
+ from typing import Optional
4
+
5
+ import inject
6
+ from langchain_core.runnables import RunnableConfig
7
+ from langfuse.callback import CallbackHandler
8
+
9
+ from langchain_ocr_lib.impl.settings.langfuse_settings import LangfuseSettings
10
+ from langchain_ocr_lib.tracers.traced_chain import TracedChain
11
+
12
+
13
+ class LangfuseTracedChain(TracedChain):
14
+ """A class to trace the execution of a Runnable using Langfuse.
15
+
16
+ This class wraps an inner Runnable and adds tracing capabilities using the Langfuse tracer.
17
+ It allows for the configuration of the tracer through the provided settings.
18
+
19
+ Attributes
20
+ ----------
21
+ CONFIG_CALLBACK_KEY : str
22
+ The key used to store callbacks in the configuration.
23
+ """
24
+
25
+ CONFIG_CALLBACK_KEY = "callbacks"
26
+ _inner_chain = inject.attr("OcrChain")
27
+
28
+ def __init__(self, settings: LangfuseSettings):
29
+ super().__init__()
30
+ self._settings = settings
31
+
32
+ def _add_tracing_callback(self, session_id: str, config: Optional[RunnableConfig]) -> RunnableConfig:
33
+ handler = CallbackHandler(
34
+ public_key=self._settings.public_key,
35
+ secret_key=self._settings.secret_key,
36
+ host=self._settings.host,
37
+ session_id=session_id,
38
+ )
39
+ if not config:
40
+ return RunnableConfig(callbacks=[handler])
41
+
42
+ current_callbacks = config.get(self.CONFIG_CALLBACK_KEY, [])
43
+ config[self.CONFIG_CALLBACK_KEY] = (current_callbacks if current_callbacks else []) + [handler]
44
+ return config
@@ -0,0 +1,19 @@
1
+ """Module to map language codes to language names using pycountry."""
2
+
3
+ import pycountry
4
+
5
+
6
+ def get_language_name_from_pycountry(code: str) -> str:
7
+ """Given a language abbreviation (ISO 639-1), return the full language name in English using pycountry."""
8
+ language = pycountry.languages.get(alpha_2=code.lower())
9
+ if language:
10
+ # Sometimes language.name may include extra parts, adjust as needed.
11
+ return language.name.lower()
12
+ return None
13
+
14
+
15
+ # Example usage:
16
+ if __name__ == "__main__":
17
+ lang_codes = ["en", "de", "ru", "it", "es", "zh", "ja", "fr"]
18
+ for lang_code in lang_codes:
19
+ print(f"pycountry: {lang_code} -> {get_language_name_from_pycountry(lang_code)}")
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env python
2
+ """Command-line interface for langchain-ocr-lib package."""
3
+
4
+ import argparse
5
+ import asyncio
6
+ import os
7
+ import sys
8
+ from typing import Optional
9
+
10
+ from langchain_ocr_lib.di_config import configure_di
11
+ import inject
12
+ from langchain_ocr_lib.impl.converter.image_converter import Image2MarkdownConverter
13
+ from langchain_ocr_lib.impl.converter.pdf_converter import Pdf2MarkdownConverter
14
+
15
+
16
+ def setup() -> None:
17
+ """Initialize the dependency injection configuration."""
18
+ configure_di()
19
+
20
+
21
+ async def convert_image_file(file_path: str, output_file: Optional[str] = None) -> str:
22
+ """Convert an image file to markdown text.
23
+
24
+ Parameters
25
+ ----------
26
+ file_path : str
27
+ Path to the image file
28
+ output_file : Optional[str]
29
+ Path to save the markdown output, if None prints to stdout
30
+
31
+ Returns
32
+ -------
33
+ str
34
+ The markdown text
35
+ """
36
+ if not os.path.exists(file_path):
37
+ print(f"Error: File {file_path} not found", file=sys.stderr)
38
+ sys.exit(1)
39
+
40
+ converter = inject.instance(Image2MarkdownConverter)
41
+
42
+ # Pass the filename directly to the converter
43
+ result = await converter.aconvert2markdown(file=None, filename=file_path)
44
+
45
+ if output_file:
46
+ with open(output_file, "w", encoding="utf-8") as f:
47
+ f.write(result)
48
+ print(f"Markdown saved to {output_file}")
49
+ else:
50
+ print(result)
51
+
52
+ return result
53
+
54
+
55
+ async def convert_pdf_file(file_path: str, output_file: Optional[str] = None) -> str:
56
+ """Convert a PDF file to markdown text.
57
+
58
+ Parameters
59
+ ----------
60
+ file_path : str
61
+ Path to the PDF file
62
+ output_file : Optional[str]
63
+ Path to save the markdown output, if None prints to stdout
64
+
65
+ Returns
66
+ -------
67
+ str
68
+ The markdown text
69
+ """
70
+ if not os.path.exists(file_path):
71
+ print(f"Error: File {file_path} not found", file=sys.stderr)
72
+ sys.exit(1)
73
+
74
+ converter = inject.instance(Pdf2MarkdownConverter)
75
+
76
+ # Pass the filename directly to the converter
77
+ result = await converter.aconvert2markdown(file=None, filename=file_path)
78
+
79
+ if output_file:
80
+ with open(output_file, "w", encoding="utf-8") as f:
81
+ f.write(result)
82
+ print(f"Markdown saved to {output_file}")
83
+ else:
84
+ print(result)
85
+
86
+ return result
87
+
88
+
89
+ def main():
90
+ """Run the CLI application."""
91
+ parser = argparse.ArgumentParser(description="Convert images or PDFs to Markdown")
92
+ parser.add_argument("file", help="Path to the image or PDF file")
93
+ parser.add_argument("-o", "--output", help="Output file path (default: print to stdout)", default=None)
94
+ parser.add_argument(
95
+ "-t", "--type", choices=["auto", "image", "pdf"], default="auto", help="File type (default: auto-detect)"
96
+ )
97
+
98
+ args = parser.parse_args()
99
+
100
+ # Setup dependency injection
101
+ setup()
102
+
103
+ file_type = args.type
104
+ if file_type == "auto":
105
+ if args.file.lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".bmp")):
106
+ file_type = "image"
107
+ elif args.file.lower().endswith(".pdf"):
108
+ file_type = "pdf"
109
+ else:
110
+ print(f"Error: Could not detect file type of {args.file}", file=sys.stderr)
111
+ sys.exit(1)
112
+
113
+ if file_type == "image":
114
+ asyncio.run(convert_image_file(args.file, args.output))
115
+ elif file_type == "pdf":
116
+ asyncio.run(convert_pdf_file(args.file, args.output))
117
+
118
+
119
+ if __name__ == "__main__":
120
+ main()
121
+
122
+ # langchain-ocr image.png -o output.md
File without changes
@@ -0,0 +1,60 @@
1
+ import json
2
+
3
+ from langchain.prompts import ChatPromptTemplate
4
+
5
+ from langchain_ocr_lib.language_mapping.language_mapping import get_language_name_from_pycountry
6
+
7
+
8
+ def ocr_prompt_template_builder(language: str = "en", model_name: str = "") -> str:
9
+ system_prompt = f"""
10
+ You are an advanced OCR tool. Your task is to extract all text content from this image in {get_language_name_from_pycountry(language)} **verbatim**, without any modifications, interpretations, summarizations, or omissions by keeping the original format in Markdown. **It is imperative that you do not add, infer, or hallucinate any content that is not explicitly present in the image.**
11
+
12
+ **Requirements:** Adhere to the following guidelines:
13
+
14
+ - **Headers:** Use Markdown headers (`#`, `##`, `###`, etc.) **only if corresponding heading structures are explicitly present in the image**. Match the level of the header accurately.
15
+ - **Lists:** Preserve all original list formats (unordered lists using `-` or `*`, and ordered lists with numbers) **exactly as they appear** in the image. Maintain the original indentation.
16
+ - **Text Formatting:** Retain all visual text formatting (bold, italics, underlines, strikethrough, etc.) using the appropriate Markdown syntax (`**bold**`, `*italic*`, `<u>underline</u>`, `~~strikethrough~~`). If a direct Markdown equivalent doesn't exist, prioritize accuracy of the text content.
17
+ - **Code Blocks:** If code or preformatted text is detected (often with a distinct font or background), format it using Markdown code blocks (using triple backticks ```).
18
+ - **Tables:** If tabular data is present, attempt to format it as a Markdown table using pipes `|` and hyphens `-`. If the table structure is complex, prioritize accurate text extraction over perfect table formatting.
19
+ - **Spacing and Line Breaks:** Maintain original line breaks and spacing to preserve the layout as accurately as possible.
20
+
21
+ **Additional Verification:**
22
+ - After extraction, verify that every Markdown element (headers, lists, code blocks, tables, etc.) exactly reflects the appearance and structure in the image.
23
+ - Ensure that no part of the content (including headers, footers, and any subtext) is omitted or altered.
24
+ - If any element is ambiguous, replicate the original formatting as closely as possible.
25
+
26
+ **Text Extraction:**
27
+ - Extract all text content from the image, including headings, paragraphs, lists, tables, and any other textual elements.
28
+ - Do **not omit** any part of the page.
29
+ - Accurately replicate all visual formatting such as bold, italics, underlines, and other styles.
30
+
31
+ **Example:**
32
+ If the image contains the following text layout:
33
+ ------------------------------------------------
34
+ # Chapter 1: Introduction
35
+
36
+ Welcome to the document.
37
+
38
+ **Key Points:**
39
+ - Item 1
40
+ - Item 2
41
+
42
+ ```python
43
+ print("Hello, world!")
44
+ ```
45
+ ------------------------------------------------
46
+ Then your output should be exactly as above, preserving the Markdown syntax for headers, bold text, lists, and code blocks.
47
+
48
+ """
49
+
50
+ if "llama3.2" in model_name:
51
+ system_prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>" + system_prompt + "<|eot_id|>"
52
+
53
+ ocr_prompt_template = [
54
+ {"role": "system", "content": system_prompt},
55
+ {
56
+ "role": "user",
57
+ "content": json.dumps([{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,{image_data}"}}]),
58
+ },
59
+ ]
60
+ return ocr_prompt_template
File without changes
@@ -0,0 +1,88 @@
1
+ """Module for the TracedGraph class."""
2
+
3
+ import uuid
4
+ from abc import ABC, abstractmethod
5
+ from typing import Any, Optional
6
+
7
+ from langchain_core.runnables import RunnableConfig, ensure_config
8
+
9
+ from langchain_ocr_lib.chains.chain import Chain
10
+
11
+ RunnableInput = Any
12
+ RunnableOutput = Any
13
+
14
+
15
+ class TracedChain(Chain[RunnableInput, RunnableOutput], ABC):
16
+ """A class to represent a traced graph in an asynchronous chain.
17
+
18
+ This class is designed to wrap around an inner Runnable chain and add tracing capabilities to it.
19
+ It provides methods to asynchronously invoke the chain with tracing and to manage session IDs and tracing callbacks.
20
+
21
+ Attributes
22
+ ----------
23
+ SESSION_ID_KEY : str
24
+ The key used to store the session ID in the metadata.
25
+ METADATA_KEY : str
26
+ The key used to store metadata.
27
+ """
28
+
29
+ SESSION_ID_KEY = "session_id"
30
+ METADATA_KEY = "metadata"
31
+
32
+ async def ainvoke(
33
+ self, chain_input: RunnableInput, config: Optional[RunnableConfig] = None, **kwargs: Any
34
+ ) -> RunnableOutput:
35
+ """
36
+ Asynchronously invoke the chain with the given input and configuration.
37
+
38
+ Parameters
39
+ ----------
40
+ chain_input : RunnableInput
41
+ The input to be processed by the chain.
42
+ config : Optional[RunnableConfig], optional
43
+ Configuration for the chain execution (default None).
44
+ **kwargs : Any
45
+ Additional keyword arguments.
46
+
47
+ Returns
48
+ -------
49
+ RunnableOutput
50
+ The output produced by the chain after processing the input.
51
+
52
+ """
53
+ config = ensure_config(config)
54
+ session_id = self._get_session_id(config)
55
+ config_with_tracing = self._add_tracing_callback(session_id, config)
56
+ return await self._inner_chain.ainvoke(chain_input, config=config_with_tracing)
57
+
58
+ def invoke(
59
+ self, chain_input: RunnableInput, config: Optional[RunnableConfig] = None, **kwargs: Any
60
+ ) -> RunnableOutput:
61
+ """
62
+ Invoke the chain with the given input and configuration.
63
+
64
+ Parameters
65
+ ----------
66
+ chain_input : RunnableInput
67
+ The input to be processed by the chain.
68
+ config : Optional[RunnableConfig], optional
69
+ Configuration for the chain execution (default None).
70
+ **kwargs : Any
71
+ Additional keyword arguments.
72
+
73
+ Returns
74
+ -------
75
+ RunnableOutput
76
+ The output produced by the chain after processing the input.
77
+ """
78
+ config = ensure_config(config)
79
+ session_id = self._get_session_id(config)
80
+ config_with_tracing = self._add_tracing_callback(session_id, config)
81
+ return self._inner_chain.invoke(chain_input, config=config_with_tracing)
82
+
83
+ @abstractmethod
84
+ def _add_tracing_callback(self, session_id: str, config: Optional[RunnableConfig]) -> RunnableConfig:
85
+ ...
86
+
87
+ def _get_session_id(self, config: Optional[RunnableConfig]) -> str:
88
+ return config.get(self.METADATA_KEY, {}).get(self.SESSION_ID_KEY, str(uuid.uuid4()))
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.3
2
+ Name: langchain-ocr-lib
3
+ Version: 0.1.0
4
+ Summary:
5
+ License: MIT
6
+ Author: Andreas Klos
7
+ Author-email: aklos@outlook.de
8
+ Requires-Python: >=3.11,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Requires-Dist: deprecated (>=1.2.14,<2.0.0)
15
+ Requires-Dist: inject (>=5.2.1,<6.0.0)
16
+ Requires-Dist: langchain-community (>=0.3.19,<0.4.0)
17
+ Requires-Dist: langchain-ollama (>=0.2.0,<0.3.0)
18
+ Requires-Dist: langchain-openai (>=0.3.8,<0.4.0)
19
+ Requires-Dist: langfuse (>=2.59.7,<3.0.0)
20
+ Requires-Dist: openai (>=1.42.0,<2.0.0)
21
+ Requires-Dist: pdf2image (>=1.17.0,<2.0.0)
22
+ Requires-Dist: pillow (>=11.0.0,<12.0.0)
23
+ Requires-Dist: pycountry (>=24.6.1,<25.0.0)
24
+ Requires-Dist: pytest-asyncio (>=0.25.0,<0.26.0)
25
+ Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
26
+ Description-Content-Type: text/markdown
27
+
28
+
@@ -0,0 +1,35 @@
1
+ langchain_ocr_lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ langchain_ocr_lib/chains/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ langchain_ocr_lib/chains/chain.py,sha256=D00wnm987YgkJsIAIwQVehX_B4kBOzrjistaPf1M0GE,1946
4
+ langchain_ocr_lib/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ langchain_ocr_lib/converter/converter.py,sha256=aJuaLX2942d8WRPNaU0cUPO1_266QE7Y6SVKpnxpGBA,1196
6
+ langchain_ocr_lib/di_config.py,sha256=H1CxtSlzUH3QGkRFBQqgMGJZx5HGWQ0yrB2kEvFIbOk,3083
7
+ langchain_ocr_lib/impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ langchain_ocr_lib/impl/chains/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ langchain_ocr_lib/impl/chains/ocr_chain.py,sha256=eGiflXVbo1UP56rRHPY6fB4woJtyIvv4SmXNC1RHWFY,2594
10
+ langchain_ocr_lib/impl/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ langchain_ocr_lib/impl/converter/image_converter.py,sha256=G1rDOCbudWNL4sDvSGJ7CeeFrWUblfWPGaZf5JsnpiM,2871
12
+ langchain_ocr_lib/impl/converter/pdf_converter.py,sha256=ssj8DL_9wf6kMhjUhDkw0gwSwNLrvgh8nBRspwj60Vk,3510
13
+ langchain_ocr_lib/impl/langfuse_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ langchain_ocr_lib/impl/langfuse_manager/langfuse_manager.py,sha256=G_qGE_-LnPpNJYgkoDoVqoXYkwsaMkB_HN2uSng3YVA,5245
15
+ langchain_ocr_lib/impl/llms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ langchain_ocr_lib/impl/llms/llm_factory.py,sha256=9DsUdoYNrjeWLGA9ISDdHN2cxcQ7DquNQ5it6zSxHlg,2199
17
+ langchain_ocr_lib/impl/llms/llm_type.py,sha256=_Ap7yStlBn0tyOyfVLH1c2j2A9-ccsTCxAm7bgoRQnM,268
18
+ langchain_ocr_lib/impl/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ langchain_ocr_lib/impl/settings/langfuse_settings.py,sha256=5lr3tVeiHXDUaYtWAnZPXrKxBJgM2wgaz7yyZThhCsE,812
20
+ langchain_ocr_lib/impl/settings/language_settings.py,sha256=tdAC1t5wGu1MoH1jhjkDnxnX4Ui7giwxt7Qm8_LPkP8,627
21
+ langchain_ocr_lib/impl/settings/llm_class_type_settings.py,sha256=4KC6zxby13wn38rB8055J8LNVTsmUfrOiyLtLuToHaM,598
22
+ langchain_ocr_lib/impl/settings/ollama_chat_settings.py,sha256=8RWMsaK4qDrqC6Mrxekr8IEDYwcvjYwhw9xDwZemxI4,1506
23
+ langchain_ocr_lib/impl/settings/openai_chat_settings.py,sha256=cXzxe-sea8VCK2M_u9ZIL4l8AR_YdhmA-phZa9fwf8o,1233
24
+ langchain_ocr_lib/impl/tracers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
+ langchain_ocr_lib/impl/tracers/langfuse_traced_chain.py,sha256=lfYLEf9mJ2ie5wofHFG_FUicRi1281XGBC0GKXcAkHM,1546
26
+ langchain_ocr_lib/language_mapping/language_mapping.py,sha256=VY7WkkZauoHNxkvgUYbig0rDmlKqDkz24cXMd6A7txM,700
27
+ langchain_ocr_lib/main.py,sha256=_kx6pIsIV9pii2_TSYisFT4tKDQHMHef6buWhWoj11E,3485
28
+ langchain_ocr_lib/prompt_templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
+ langchain_ocr_lib/prompt_templates/ocr_prompt.py,sha256=3Be1AL-HJkxPnAP0DNH1MqvAxFWTCeM5UOKP63xkHsY,3543
30
+ langchain_ocr_lib/tracers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ langchain_ocr_lib/tracers/traced_chain.py,sha256=uxRkdLNn_G6dAsti_gUuF7muhIj10xrOUL7HUga40oc,3056
32
+ langchain_ocr_lib-0.1.0.dist-info/METADATA,sha256=2H3iEatfiflH4GcrFhIw2Cg8wjsgKsLoeP2irFsVTio,991
33
+ langchain_ocr_lib-0.1.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
34
+ langchain_ocr_lib-0.1.0.dist-info/entry_points.txt,sha256=l4mIs0tnIgbJYuVveZySQKVBnqNMHS-8ZZtLwz8ag5k,61
35
+ langchain_ocr_lib-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.1.2
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ langchain-ocr=langchain_ocr_lib.main:main
3
+