langchain-ocr-lib 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_ocr_lib/__init__.py +0 -0
- langchain_ocr_lib/chains/__init__.py +0 -0
- langchain_ocr_lib/chains/chain.py +55 -0
- langchain_ocr_lib/converter/__init__.py +0 -0
- langchain_ocr_lib/converter/converter.py +52 -0
- langchain_ocr_lib/di_config.py +86 -0
- langchain_ocr_lib/impl/__init__.py +0 -0
- langchain_ocr_lib/impl/chains/__init__.py +0 -0
- langchain_ocr_lib/impl/chains/ocr_chain.py +86 -0
- langchain_ocr_lib/impl/converter/__init__.py +0 -0
- langchain_ocr_lib/impl/converter/image_converter.py +88 -0
- langchain_ocr_lib/impl/converter/pdf_converter.py +105 -0
- langchain_ocr_lib/impl/langfuse_manager/__init__.py +0 -0
- langchain_ocr_lib/impl/langfuse_manager/langfuse_manager.py +149 -0
- langchain_ocr_lib/impl/llms/__init__.py +0 -0
- langchain_ocr_lib/impl/llms/llm_factory.py +66 -0
- langchain_ocr_lib/impl/llms/llm_type.py +11 -0
- langchain_ocr_lib/impl/settings/__init__.py +0 -0
- langchain_ocr_lib/impl/settings/langfuse_settings.py +29 -0
- langchain_ocr_lib/impl/settings/language_settings.py +25 -0
- langchain_ocr_lib/impl/settings/llm_class_type_settings.py +27 -0
- langchain_ocr_lib/impl/settings/ollama_chat_settings.py +42 -0
- langchain_ocr_lib/impl/settings/openai_chat_settings.py +35 -0
- langchain_ocr_lib/impl/tracers/__init__.py +0 -0
- langchain_ocr_lib/impl/tracers/langfuse_traced_chain.py +44 -0
- langchain_ocr_lib/language_mapping/language_mapping.py +19 -0
- langchain_ocr_lib/main.py +122 -0
- langchain_ocr_lib/prompt_templates/__init__.py +0 -0
- langchain_ocr_lib/prompt_templates/ocr_prompt.py +60 -0
- langchain_ocr_lib/tracers/__init__.py +0 -0
- langchain_ocr_lib/tracers/traced_chain.py +88 -0
- langchain_ocr_lib-0.1.0.dist-info/METADATA +28 -0
- langchain_ocr_lib-0.1.0.dist-info/RECORD +35 -0
- langchain_ocr_lib-0.1.0.dist-info/WHEEL +4 -0
- langchain_ocr_lib-0.1.0.dist-info/entry_points.txt +3 -0
File without changes
|
File without changes
|
@@ -0,0 +1,55 @@
|
|
1
|
+
"""Module for the base class of chains."""
|
2
|
+
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from typing import Any, Optional
|
5
|
+
|
6
|
+
from langchain_core.runnables import Runnable, RunnableConfig
|
7
|
+
from langchain_core.runnables.utils import Input, Output
|
8
|
+
|
9
|
+
|
10
|
+
class Chain(Runnable[Input, Output], ABC):
|
11
|
+
"""Base class for chains."""
|
12
|
+
|
13
|
+
@abstractmethod
|
14
|
+
async def ainvoke(self, chain_input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any) -> Output:
|
15
|
+
"""Asynchronously invoke the chain with the given input and configuration.
|
16
|
+
|
17
|
+
Parameters
|
18
|
+
----------
|
19
|
+
chain_input : Input
|
20
|
+
The input data required to asynchronously invoke the chain.
|
21
|
+
config : Optional[RunnableConfig], optional
|
22
|
+
The configuration settings for the chain invocation, by default None.
|
23
|
+
**kwargs : Any
|
24
|
+
Additional keyword arguments that may be required for the chain invocation.
|
25
|
+
|
26
|
+
Returns
|
27
|
+
-------
|
28
|
+
Output
|
29
|
+
The result of the chain invocation.
|
30
|
+
"""
|
31
|
+
|
32
|
+
@abstractmethod
|
33
|
+
def invoke(self, chain_input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any) -> Output:
|
34
|
+
"""
|
35
|
+
Invoke the chain with the given input and configuration.
|
36
|
+
|
37
|
+
Typing indicates `Output` will be the return, but because no implementation is planned,
|
38
|
+
this will never be returned. This method is not implemented and will raise a not implemented error.
|
39
|
+
|
40
|
+
Notes
|
41
|
+
-----
|
42
|
+
This method should never be called. It exists only because the base class requires an implementation.
|
43
|
+
|
44
|
+
Parameters
|
45
|
+
----------
|
46
|
+
chain_input : Input
|
47
|
+
The input data required to invoke the chain.
|
48
|
+
config : Optional[RunnableConfig], optional
|
49
|
+
The configuration settings for the chain invocation, by default None.
|
50
|
+
|
51
|
+
Returns
|
52
|
+
-------
|
53
|
+
Output
|
54
|
+
The result of the chain invocation.
|
55
|
+
"""
|
File without changes
|
@@ -0,0 +1,52 @@
|
|
1
|
+
"""Module for the File2MarkdownConverter class."""
|
2
|
+
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
import inject
|
5
|
+
|
6
|
+
|
7
|
+
class File2MarkdownConverter(ABC):
|
8
|
+
"""Abstract base class for the File2MarkdownConverter class."""
|
9
|
+
|
10
|
+
_chain = inject.attr("LangfuseTracedChain")
|
11
|
+
|
12
|
+
@abstractmethod
|
13
|
+
async def aconvert2markdown(self, file: bytes) -> str:
|
14
|
+
"""Asynchronously convert file to markdown format.
|
15
|
+
|
16
|
+
Parameters
|
17
|
+
----------
|
18
|
+
file : bytes
|
19
|
+
The file to convert.
|
20
|
+
|
21
|
+
Returns
|
22
|
+
-------
|
23
|
+
str
|
24
|
+
The markdown representation of the file.
|
25
|
+
|
26
|
+
Raises
|
27
|
+
------
|
28
|
+
NotImplementedError
|
29
|
+
If the method is not implemented.
|
30
|
+
"""
|
31
|
+
raise NotImplementedError
|
32
|
+
|
33
|
+
@abstractmethod
|
34
|
+
def convert2markdown(self, file: bytes) -> str:
|
35
|
+
"""Convert file to markdown format.
|
36
|
+
|
37
|
+
Parameters
|
38
|
+
----------
|
39
|
+
file : bytes
|
40
|
+
The file to convert.
|
41
|
+
|
42
|
+
Returns
|
43
|
+
-------
|
44
|
+
str
|
45
|
+
The markdown representation of the file.
|
46
|
+
|
47
|
+
Raises
|
48
|
+
------
|
49
|
+
NotImplementedError
|
50
|
+
If the method is not implemented.
|
51
|
+
"""
|
52
|
+
raise NotImplementedError
|
@@ -0,0 +1,86 @@
|
|
1
|
+
"""Module containing the dependency injection container for managing application dependencies."""
|
2
|
+
|
3
|
+
from inject import Binder
|
4
|
+
import inject
|
5
|
+
from langchain_ollama import ChatOllama
|
6
|
+
from langchain_openai import ChatOpenAI
|
7
|
+
from langfuse import Langfuse
|
8
|
+
|
9
|
+
from langchain_ocr_lib.impl.chains.ocr_chain import OcrChain
|
10
|
+
from langchain_ocr_lib.impl.settings.ollama_chat_settings import OllamaSettings
|
11
|
+
from langchain_ocr_lib.impl.settings.openai_chat_settings import OpenAISettings
|
12
|
+
from langchain_ocr_lib.impl.settings.llm_class_type_settings import LlmClassTypeSettings
|
13
|
+
from langchain_ocr_lib.impl.settings.langfuse_settings import LangfuseSettings
|
14
|
+
from langchain_ocr_lib.impl.settings.language_settings import LanguageSettings
|
15
|
+
from langchain_ocr_lib.impl.tracers.langfuse_traced_chain import LangfuseTracedChain
|
16
|
+
from langchain_ocr_lib.prompt_templates.ocr_prompt import ocr_prompt_template_builder
|
17
|
+
from langchain_ocr_lib.impl.llms.llm_factory import llm_provider
|
18
|
+
from langchain_ocr_lib.impl.langfuse_manager.langfuse_manager import LangfuseManager
|
19
|
+
from langchain_ocr_lib.impl.converter.pdf_converter import Pdf2MarkdownConverter
|
20
|
+
from langchain_ocr_lib.impl.converter.image_converter import Image2MarkdownConverter
|
21
|
+
|
22
|
+
|
23
|
+
def lib_di_config(binder: Binder):
|
24
|
+
"""Configure dependency injection bindings for the OCR library.
|
25
|
+
|
26
|
+
Parameters
|
27
|
+
----------
|
28
|
+
binder : Binder
|
29
|
+
The dependency injection binder instance used to register the bindings.
|
30
|
+
|
31
|
+
Raises
|
32
|
+
------
|
33
|
+
NotImplementedError
|
34
|
+
If the configured LLM type is not implemented.
|
35
|
+
|
36
|
+
"""
|
37
|
+
langfuse_settings = LangfuseSettings()
|
38
|
+
llm_class_type_settings = LlmClassTypeSettings()
|
39
|
+
language_settings = LanguageSettings()
|
40
|
+
|
41
|
+
if llm_class_type_settings.llm_type == "ollama":
|
42
|
+
settings = OllamaSettings()
|
43
|
+
llm_instance = llm_provider(settings, ChatOllama)
|
44
|
+
elif llm_class_type_settings.llm_type == "openai":
|
45
|
+
settings = OpenAISettings()
|
46
|
+
llm_instance = llm_provider(settings, ChatOpenAI)
|
47
|
+
else:
|
48
|
+
raise NotImplementedError("Configured LLM is not implemented")
|
49
|
+
binder.bind("LargeLanguageModel", llm_instance)
|
50
|
+
|
51
|
+
prompt = ocr_prompt_template_builder(language=language_settings.language, model_name=settings.model)
|
52
|
+
|
53
|
+
binder.bind(
|
54
|
+
"LangfuseClient",
|
55
|
+
Langfuse(
|
56
|
+
public_key=langfuse_settings.public_key,
|
57
|
+
secret_key=langfuse_settings.secret_key,
|
58
|
+
host=langfuse_settings.host,
|
59
|
+
),
|
60
|
+
)
|
61
|
+
|
62
|
+
binder.bind(
|
63
|
+
"LangfuseManager",
|
64
|
+
LangfuseManager(
|
65
|
+
managed_prompts={
|
66
|
+
OcrChain.__name__: prompt,
|
67
|
+
},
|
68
|
+
),
|
69
|
+
)
|
70
|
+
|
71
|
+
binder.bind("OcrChain", OcrChain())
|
72
|
+
|
73
|
+
binder.bind(
|
74
|
+
"LangfuseTracedChain",
|
75
|
+
LangfuseTracedChain(
|
76
|
+
settings=langfuse_settings,
|
77
|
+
),
|
78
|
+
)
|
79
|
+
|
80
|
+
binder.bind("PdfConverter", Pdf2MarkdownConverter())
|
81
|
+
binder.bind("ImageConverter", Image2MarkdownConverter())
|
82
|
+
|
83
|
+
|
84
|
+
def configure_di():
|
85
|
+
"""Configure dependency injection using the `inject` library."""
|
86
|
+
inject.configure(lib_di_config, allow_override=True, clear=True)
|
File without changes
|
File without changes
|
@@ -0,0 +1,86 @@
|
|
1
|
+
"""Module for LLM answer generation chain."""
|
2
|
+
|
3
|
+
from typing import Any, Optional
|
4
|
+
|
5
|
+
from langchain_core.runnables import Runnable, RunnableConfig
|
6
|
+
from langchain_core.runnables.utils import Input
|
7
|
+
import inject
|
8
|
+
|
9
|
+
from langchain_ocr_lib.chains.chain import Chain
|
10
|
+
|
11
|
+
RunnableInput = Input # TODO: adjust properly
|
12
|
+
RunnableOutput = str
|
13
|
+
|
14
|
+
|
15
|
+
class OcrChain(Chain[RunnableInput, RunnableOutput]):
|
16
|
+
"""Base class for LLM answer generation chain."""
|
17
|
+
|
18
|
+
_langfuse_manager = inject.attr("LangfuseManager")
|
19
|
+
|
20
|
+
def __init__(self):
|
21
|
+
"""Initialize the AnswerGenerationChain.
|
22
|
+
|
23
|
+
Parameters
|
24
|
+
----------
|
25
|
+
langfuse_manager : LangfuseManager
|
26
|
+
Manager instance for handling Langfuse operations and monitoring
|
27
|
+
"""
|
28
|
+
|
29
|
+
async def ainvoke(
|
30
|
+
self, chain_input: RunnableInput, config: Optional[RunnableConfig] = None, **kwargs: Any
|
31
|
+
) -> RunnableOutput:
|
32
|
+
"""
|
33
|
+
Asynchronously invokes the chain with given input.
|
34
|
+
|
35
|
+
Parameters
|
36
|
+
----------
|
37
|
+
chain_input : RunnableInput
|
38
|
+
The input to be processed by the chain.
|
39
|
+
chain_config : Optional[RunnableConfig]
|
40
|
+
Configuration for the chain execution (default None).
|
41
|
+
**kwargs : Any
|
42
|
+
Additional keyword arguments passed to the chain.
|
43
|
+
|
44
|
+
Returns
|
45
|
+
-------
|
46
|
+
RunnableOutput
|
47
|
+
The output generated by the chain.
|
48
|
+
|
49
|
+
Raises
|
50
|
+
------
|
51
|
+
ChainError
|
52
|
+
If an error occurs during chain execution.
|
53
|
+
"""
|
54
|
+
return await self._create_chain().ainvoke(chain_input, config=config)
|
55
|
+
|
56
|
+
def invoke(
|
57
|
+
self, chain_input: RunnableInput, config: Optional[RunnableConfig] = None, **kwargs: Any
|
58
|
+
) -> RunnableOutput:
|
59
|
+
"""
|
60
|
+
Invoke the chain with given input.
|
61
|
+
|
62
|
+
Parameters
|
63
|
+
----------
|
64
|
+
chain_input : RunnableInput
|
65
|
+
The input to be processed by the chain.
|
66
|
+
chain_config : Optional[RunnableConfig]
|
67
|
+
Configuration for the chain execution (default None).
|
68
|
+
**kwargs : Any
|
69
|
+
Additional keyword arguments passed to the chain.
|
70
|
+
|
71
|
+
Returns
|
72
|
+
-------
|
73
|
+
RunnableOutput
|
74
|
+
The output generated by the chain.
|
75
|
+
|
76
|
+
Raises
|
77
|
+
------
|
78
|
+
ChainError
|
79
|
+
If an error occurs during chain execution.
|
80
|
+
"""
|
81
|
+
return self._create_chain().invoke(chain_input, config=config)
|
82
|
+
|
83
|
+
def _create_chain(self) -> Runnable:
|
84
|
+
return self._langfuse_manager.get_base_prompt(self.__class__.__name__) | self._langfuse_manager.get_base_llm(
|
85
|
+
self.__class__.__name__
|
86
|
+
)
|
File without changes
|
@@ -0,0 +1,88 @@
|
|
1
|
+
"""Module for converting an image to markdown using a Langchain chain."""
|
2
|
+
|
3
|
+
import io
|
4
|
+
import base64
|
5
|
+
from PIL import Image
|
6
|
+
from PIL.ImageFile import ImageFile
|
7
|
+
|
8
|
+
from langchain_ocr_lib.converter.converter import File2MarkdownConverter
|
9
|
+
|
10
|
+
|
11
|
+
class Image2MarkdownConverter(File2MarkdownConverter):
|
12
|
+
"""Converts an image to markdown using a Langchain chain."""
|
13
|
+
|
14
|
+
async def aconvert2markdown(self, file: ImageFile | None = None, filename: str | None = None) -> str:
|
15
|
+
"""
|
16
|
+
Asynchronously converts an image to markdown using a Langchain chain.
|
17
|
+
|
18
|
+
Parameters
|
19
|
+
----------
|
20
|
+
file : ImageFile | None, optional
|
21
|
+
PIL Image object to convert, by default None
|
22
|
+
filename : str | None, optional
|
23
|
+
Path to the image file to convert, by default None
|
24
|
+
|
25
|
+
Returns
|
26
|
+
-------
|
27
|
+
str
|
28
|
+
Markdown representation of the image.
|
29
|
+
|
30
|
+
Raises
|
31
|
+
------
|
32
|
+
ValueError
|
33
|
+
If no file or filename is provided.
|
34
|
+
ValueError
|
35
|
+
If the file is corrupted or the file type is unsupported.
|
36
|
+
"""
|
37
|
+
if file is None and filename is None:
|
38
|
+
raise ValueError("No file provided")
|
39
|
+
if file is None:
|
40
|
+
try:
|
41
|
+
file = Image.open(filename)
|
42
|
+
except Exception as e:
|
43
|
+
raise ValueError("Image corrupted or unsupported file type, %s" % e)
|
44
|
+
|
45
|
+
buf = io.BytesIO()
|
46
|
+
file.save(buf, format="PNG")
|
47
|
+
base64_img = base64.b64encode(buf.getvalue()).decode("utf-8")
|
48
|
+
response = await self._chain.ainvoke({"image_data": base64_img})
|
49
|
+
|
50
|
+
return response.content
|
51
|
+
|
52
|
+
def convert2markdown(self, file: ImageFile | None = None, filename: str | None = None) -> str:
|
53
|
+
"""
|
54
|
+
Convert an image to markdown using a Langchain chain.
|
55
|
+
|
56
|
+
Parameters
|
57
|
+
----------
|
58
|
+
file : ImageFile | None, optional
|
59
|
+
PIL Image object to convert, by default None
|
60
|
+
filename : str | None, optional
|
61
|
+
Path to the image file to convert, by default None
|
62
|
+
|
63
|
+
Returns
|
64
|
+
-------
|
65
|
+
str
|
66
|
+
Markdown representation of the image.
|
67
|
+
|
68
|
+
Raises
|
69
|
+
------
|
70
|
+
ValueError
|
71
|
+
If no file or filename is provided.
|
72
|
+
ValueError
|
73
|
+
If the file is corrupted or the file type is unsupported.
|
74
|
+
"""
|
75
|
+
if file is None and filename is None:
|
76
|
+
raise ValueError("No file provided")
|
77
|
+
if file is None:
|
78
|
+
try:
|
79
|
+
file = Image.open(filename)
|
80
|
+
except Exception as e:
|
81
|
+
raise ValueError("Image corrupted or unsupported file type, %s" % e)
|
82
|
+
|
83
|
+
buf = io.BytesIO()
|
84
|
+
file.save(buf, format="PNG")
|
85
|
+
base64_img = base64.b64encode(buf.getvalue()).decode("utf-8")
|
86
|
+
response = self._chain.invoke({"image_data": base64_img})
|
87
|
+
|
88
|
+
return response.content
|
@@ -0,0 +1,105 @@
|
|
1
|
+
"""Module for converting PDF files to markdown."""
|
2
|
+
|
3
|
+
from pdf2image import convert_from_bytes
|
4
|
+
import io
|
5
|
+
import base64
|
6
|
+
|
7
|
+
from langchain_ocr_lib.converter.converter import File2MarkdownConverter
|
8
|
+
|
9
|
+
|
10
|
+
class Pdf2MarkdownConverter(File2MarkdownConverter):
|
11
|
+
"""Converts PDF files to markdown format.
|
12
|
+
|
13
|
+
This class provides methods to convert PDF files, either provided as bytes or by filename,
|
14
|
+
into markdown format.
|
15
|
+
|
16
|
+
Attributes
|
17
|
+
----------
|
18
|
+
_chain : Chain
|
19
|
+
The OCR chain used to process images.
|
20
|
+
"""
|
21
|
+
|
22
|
+
async def aconvert2markdown(self, file: bytes | None = None, filename: str | None = None) -> str:
|
23
|
+
"""Asynchronously converts a PDF file (either provided as bytes or by filename) into markdown.
|
24
|
+
|
25
|
+
Parameters
|
26
|
+
----------
|
27
|
+
file : bytes, optional
|
28
|
+
The PDF file as bytes. Defaults to None.
|
29
|
+
filename : str, optional
|
30
|
+
The path to the PDF file. Defaults to None.
|
31
|
+
|
32
|
+
Returns
|
33
|
+
-------
|
34
|
+
str
|
35
|
+
The markdown representation of the PDF content extracted via OCR.
|
36
|
+
|
37
|
+
Raises
|
38
|
+
------
|
39
|
+
ValueError
|
40
|
+
If neither `file` nor `filename` is provided.
|
41
|
+
ValueError
|
42
|
+
If the PDF file is corrupted or the file type is unsupported.
|
43
|
+
"""
|
44
|
+
if file is None and filename is None:
|
45
|
+
raise ValueError("No file provided")
|
46
|
+
if file is None:
|
47
|
+
try:
|
48
|
+
with open(filename, "rb") as f:
|
49
|
+
file = f.read()
|
50
|
+
except Exception as e:
|
51
|
+
raise ValueError("PDF corrupted or unsupported file type, %s" % e)
|
52
|
+
|
53
|
+
images = convert_from_bytes(file)
|
54
|
+
|
55
|
+
markdown = ""
|
56
|
+
for image in images:
|
57
|
+
# Wrap the image in a Document if your chain expects it.
|
58
|
+
buf = io.BytesIO()
|
59
|
+
image.save(buf, format="PNG")
|
60
|
+
base64_img = base64.b64encode(buf.getvalue()).decode("utf-8")
|
61
|
+
response = await self._chain.ainvoke({"image_data": base64_img})
|
62
|
+
markdown += response.content
|
63
|
+
return markdown
|
64
|
+
|
65
|
+
def convert2markdown(self, file: bytes | None = None, filename: str | None = None) -> str:
|
66
|
+
"""Convert a PDF file (either provided as bytes or by filename) into markdown.
|
67
|
+
|
68
|
+
Parameters
|
69
|
+
----------
|
70
|
+
file : bytes, optional
|
71
|
+
The PDF file as bytes. Defaults to None.
|
72
|
+
filename : str, optional
|
73
|
+
The path to the PDF file. Defaults to None.
|
74
|
+
|
75
|
+
Returns
|
76
|
+
-------
|
77
|
+
str
|
78
|
+
The markdown representation of the PDF content extracted via OCR.
|
79
|
+
|
80
|
+
Raises
|
81
|
+
------
|
82
|
+
ValueError
|
83
|
+
If neither `file` nor `filename` is provided.
|
84
|
+
ValueError
|
85
|
+
If the PDF file is corrupted or the file type is unsupported.
|
86
|
+
"""
|
87
|
+
if file is None and filename is None:
|
88
|
+
raise ValueError("No file provided")
|
89
|
+
if file is None:
|
90
|
+
try:
|
91
|
+
with open(filename, "rb") as f:
|
92
|
+
file = f.read()
|
93
|
+
except Exception as e:
|
94
|
+
raise ValueError("PDF corrupted or unsupported file type") from e
|
95
|
+
|
96
|
+
images = convert_from_bytes(file)
|
97
|
+
|
98
|
+
markdown = ""
|
99
|
+
for image in images:
|
100
|
+
buf = io.BytesIO()
|
101
|
+
image.save(buf, format="PNG")
|
102
|
+
base64_img = base64.b64encode(buf.getvalue()).decode("utf-8")
|
103
|
+
response = self._chain.invoke({"image_data": base64_img})
|
104
|
+
markdown += response.content
|
105
|
+
return markdown
|
File without changes
|
@@ -0,0 +1,149 @@
|
|
1
|
+
# spell-checker: disable
|
2
|
+
"""Module for managing Langfuse prompts and Langfuse Language Models (LLMs)."""
|
3
|
+
import logging
|
4
|
+
from typing import Optional
|
5
|
+
import inject
|
6
|
+
import json
|
7
|
+
|
8
|
+
from langchain.prompts import ChatPromptTemplate
|
9
|
+
from langchain_core.language_models.llms import LLM
|
10
|
+
from langfuse.api.resources.commons.errors.not_found_error import NotFoundError
|
11
|
+
from langfuse.model import ChatPromptClient
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class LangfuseManager:
|
17
|
+
"""Manage prompts using Langfuse and a Large Language Model (LLM).
|
18
|
+
|
19
|
+
Attributes
|
20
|
+
----------
|
21
|
+
API_KEY_FILTER : str
|
22
|
+
A filter string used to exclude the API key from configurations.
|
23
|
+
"""
|
24
|
+
|
25
|
+
API_KEY_FILTER: str = "api_key"
|
26
|
+
_llm = inject.attr("LargeLanguageModel")
|
27
|
+
_langfuse = inject.attr("LangfuseClient")
|
28
|
+
|
29
|
+
def __init__(
|
30
|
+
self,
|
31
|
+
managed_prompts: dict[str, str],
|
32
|
+
):
|
33
|
+
self._managed_prompts = managed_prompts
|
34
|
+
|
35
|
+
def init_prompts(self) -> None:
|
36
|
+
"""
|
37
|
+
Initialize the prompts managed by the LangfuseManager.
|
38
|
+
|
39
|
+
This method iterates over the keys of the managed prompts and retrieves
|
40
|
+
each prompt using the `get_langfuse_prompt` method.
|
41
|
+
|
42
|
+
Returns
|
43
|
+
-------
|
44
|
+
None
|
45
|
+
"""
|
46
|
+
for key in list(self._managed_prompts.keys()):
|
47
|
+
self.get_langfuse_prompt(key)
|
48
|
+
|
49
|
+
def get_langfuse_prompt(self, base_prompt_name: str) -> Optional[ChatPromptClient]:
|
50
|
+
"""
|
51
|
+
Retrieve the prompt from Langfuse Prompt Management.
|
52
|
+
|
53
|
+
Parameters
|
54
|
+
----------
|
55
|
+
base_prompt_name : str
|
56
|
+
The name of the base prompt to retrieve.
|
57
|
+
|
58
|
+
Returns
|
59
|
+
-------
|
60
|
+
Optional[TextPromptClient]
|
61
|
+
The Langfuse prompt template if found, otherwise None.
|
62
|
+
|
63
|
+
Raises
|
64
|
+
------
|
65
|
+
NotFoundError
|
66
|
+
If the prompt is not found in Langfuse, a new prompt is created.
|
67
|
+
Exception
|
68
|
+
If an error occurs while retrieving the prompt template from Langfuse.
|
69
|
+
"""
|
70
|
+
try:
|
71
|
+
langfuse_prompt = self._langfuse.get_prompt(base_prompt_name)
|
72
|
+
except NotFoundError:
|
73
|
+
logger.info("Prompt not found in LangFuse. Creating new.")
|
74
|
+
llm_configurable_configs = {
|
75
|
+
config.id: config.default for config in self._llm.config_specs if self.API_KEY_FILTER not in config.id
|
76
|
+
}
|
77
|
+
self._langfuse.create_prompt(
|
78
|
+
name=base_prompt_name,
|
79
|
+
prompt=self._managed_prompts[base_prompt_name],
|
80
|
+
config=llm_configurable_configs,
|
81
|
+
labels=["production"],
|
82
|
+
type="chat",
|
83
|
+
)
|
84
|
+
langfuse_prompt = self._langfuse.get_prompt(base_prompt_name)
|
85
|
+
except Exception as error:
|
86
|
+
logger.error(
|
87
|
+
"Error occured while getting prompt template from langfuse. Error:\n{error}",
|
88
|
+
extra={error: error},
|
89
|
+
)
|
90
|
+
return None
|
91
|
+
return langfuse_prompt
|
92
|
+
|
93
|
+
def get_base_llm(self, name: str) -> LLM:
|
94
|
+
"""
|
95
|
+
Get the Langfuse prompt, the configuration as well as Large Language Model (LLM).
|
96
|
+
|
97
|
+
Parameters
|
98
|
+
----------
|
99
|
+
name : str
|
100
|
+
The name of the Langfuse prompt to retrieve the configuration for.
|
101
|
+
|
102
|
+
Returns
|
103
|
+
-------
|
104
|
+
LLM
|
105
|
+
The base Large Language Model. If the Langfuse prompt is not found,
|
106
|
+
returns the LLM with a fallback configuration.
|
107
|
+
"""
|
108
|
+
langfuse_prompt = self.get_langfuse_prompt(name)
|
109
|
+
if not langfuse_prompt:
|
110
|
+
logger.error("Using fallback for llm")
|
111
|
+
return self._llm
|
112
|
+
|
113
|
+
return self._llm.with_config({"configurable": langfuse_prompt.config})
|
114
|
+
|
115
|
+
def get_base_prompt(self, name: str) -> ChatPromptTemplate:
|
116
|
+
"""
|
117
|
+
Retrieve the base prompt from Langfuse Prompt Management.
|
118
|
+
|
119
|
+
Parameters
|
120
|
+
----------
|
121
|
+
name : str
|
122
|
+
The name of the prompt to retrieve.
|
123
|
+
|
124
|
+
Returns
|
125
|
+
-------
|
126
|
+
PromptTemplate
|
127
|
+
The base prompt template.
|
128
|
+
|
129
|
+
Notes
|
130
|
+
-----
|
131
|
+
If the prompt cannot be retrieved from Langfuse, a fallback value is used.
|
132
|
+
"""
|
133
|
+
langfuse_prompt = self.get_langfuse_prompt(name)
|
134
|
+
if not langfuse_prompt:
|
135
|
+
logger.error("Could not retrieve prompt template from langfuse. Using fallback value.")
|
136
|
+
fallback = self._managed_prompts[name]
|
137
|
+
if isinstance(fallback, ChatPromptTemplate):
|
138
|
+
return fallback
|
139
|
+
if isinstance(fallback, list) and len(fallback) > 0 and isinstance(fallback[0], dict) and "content" in fallback[0]:
|
140
|
+
image_payload = [{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,{image_data}"}}]
|
141
|
+
return ChatPromptTemplate.from_messages([("system", fallback[0]["content"]), ("user", image_payload)])
|
142
|
+
else:
|
143
|
+
logger.error("Unexpected structure for fallback prompt.")
|
144
|
+
raise ValueError("Unexpected structure for fallback prompt.")
|
145
|
+
langchain_prompt = langfuse_prompt.get_langchain_prompt()
|
146
|
+
|
147
|
+
langchain_prompt[-1] = ("user", json.loads(langchain_prompt[-1][1]))
|
148
|
+
|
149
|
+
return ChatPromptTemplate.from_messages(langchain_prompt)
|
File without changes
|
@@ -0,0 +1,66 @@
|
|
1
|
+
"""Module for creating LLM instances from settings and the LLM class."""
|
2
|
+
|
3
|
+
from typing import Callable, Type
|
4
|
+
|
5
|
+
from langchain_community.llms.ollama import Ollama
|
6
|
+
from langchain_core.language_models.llms import LLM
|
7
|
+
from langchain_core.runnables import ConfigurableField
|
8
|
+
from pydantic_settings import BaseSettings
|
9
|
+
|
10
|
+
|
11
|
+
def _generic_llm_factory(
|
12
|
+
llm_class: Type[LLM],
|
13
|
+
configurable_fields: dict[str, ConfigurableField],
|
14
|
+
) -> Callable[[BaseSettings], LLM]:
|
15
|
+
def factory(settings: BaseSettings) -> LLM:
|
16
|
+
llm_instance = llm_class(**settings.model_dump())
|
17
|
+
return llm_instance.configurable_fields(**configurable_fields)
|
18
|
+
|
19
|
+
return factory
|
20
|
+
|
21
|
+
|
22
|
+
def get_configurable_fields_from(settings: BaseSettings) -> dict[str, ConfigurableField]:
|
23
|
+
"""
|
24
|
+
Extract configurable fields from the given settings.
|
25
|
+
|
26
|
+
Parameters
|
27
|
+
----------
|
28
|
+
settings : BaseSettings
|
29
|
+
An instance of BaseSettings containing model fields with their respective settings.
|
30
|
+
|
31
|
+
Returns
|
32
|
+
-------
|
33
|
+
dict[str, ConfigurableField]
|
34
|
+
A dictionary where the keys are field names and the values are ConfigurableField instances
|
35
|
+
with the field's id and name set based on the settings.
|
36
|
+
|
37
|
+
Notes
|
38
|
+
-----
|
39
|
+
Only fields with a non-None title in their settings are included in the returned dictionary.
|
40
|
+
"""
|
41
|
+
_fields = {}
|
42
|
+
for field_name in settings.model_fields:
|
43
|
+
settings_of_interest = settings.model_fields[field_name]
|
44
|
+
if settings_of_interest.title is not None:
|
45
|
+
_fields[field_name] = ConfigurableField(id=field_name, name=settings_of_interest.title)
|
46
|
+
return _fields
|
47
|
+
|
48
|
+
|
49
|
+
def llm_provider(settings: BaseSettings, llm_cls: Type[LLM] = Ollama) -> LLM:
|
50
|
+
"""
|
51
|
+
Create an instance of a LLM provider based on the given settings and class type.
|
52
|
+
|
53
|
+
Parameters
|
54
|
+
----------
|
55
|
+
settings : BaseSettings
|
56
|
+
Configuration settings for the LLM.
|
57
|
+
llm_cls : Type[LLM], optional
|
58
|
+
The class type of the LLM to instantiate (default Ollama).
|
59
|
+
|
60
|
+
Returns
|
61
|
+
-------
|
62
|
+
LLM
|
63
|
+
An instance of the specified language model provider.
|
64
|
+
"""
|
65
|
+
provider = _generic_llm_factory(llm_cls, get_configurable_fields_from(settings))
|
66
|
+
return provider(settings)
|
@@ -0,0 +1,11 @@
|
|
1
|
+
"""Module containing the Large Language Model (LLM) type enum class."""
|
2
|
+
|
3
|
+
from enum import StrEnum, unique
|
4
|
+
|
5
|
+
|
6
|
+
@unique
|
7
|
+
class LLMType(StrEnum):
|
8
|
+
"""Enum class representing different types of Large Language Models (LLMs)."""
|
9
|
+
|
10
|
+
OLLAMA = "ollama"
|
11
|
+
OPENAI = "openai"
|
File without changes
|
@@ -0,0 +1,29 @@
|
|
1
|
+
"""Contains settings regarding Langfuse."""
|
2
|
+
|
3
|
+
from pydantic import Field
|
4
|
+
from pydantic_settings import BaseSettings
|
5
|
+
|
6
|
+
|
7
|
+
class LangfuseSettings(BaseSettings):
|
8
|
+
"""
|
9
|
+
Contains settings regarding Langfuse.
|
10
|
+
|
11
|
+
Attributes
|
12
|
+
----------
|
13
|
+
secret_key : str
|
14
|
+
The secret key for Langfuse.
|
15
|
+
public_key : str
|
16
|
+
The public key for Langfuse.
|
17
|
+
host : str
|
18
|
+
The host for Langfuse.
|
19
|
+
"""
|
20
|
+
|
21
|
+
class Config:
|
22
|
+
"""Config class for reading Fields from env."""
|
23
|
+
|
24
|
+
env_prefix = "LANGFUSE_"
|
25
|
+
case_sensitive = False
|
26
|
+
|
27
|
+
secret_key: str = Field(default="", description="The secret key for Langfuse.")
|
28
|
+
public_key: str = Field(default="", description="The public key for Langfuse.")
|
29
|
+
host: str = Field(default="https://api.langchain.com", description="The host for Langfuse.")
|
@@ -0,0 +1,25 @@
|
|
1
|
+
"""Module containing the LanguageSettings class."""
|
2
|
+
|
3
|
+
from pydantic import Field
|
4
|
+
from pydantic_settings import BaseSettings
|
5
|
+
|
6
|
+
|
7
|
+
class LanguageSettings(BaseSettings):
|
8
|
+
"""
|
9
|
+
Contains settings regarding the language used for OCR.
|
10
|
+
|
11
|
+
Attributes
|
12
|
+
----------
|
13
|
+
language : str
|
14
|
+
The language to use for OCR.
|
15
|
+
"""
|
16
|
+
|
17
|
+
class Config:
|
18
|
+
"""Config class for reading fields from environment variables."""
|
19
|
+
|
20
|
+
env_prefix = "OCR_"
|
21
|
+
case_sensitive = False
|
22
|
+
|
23
|
+
language: str = Field(
|
24
|
+
default="en", description="The language in iso 639-1 format, e.g. 'en' for English, 'de' for German, etc."
|
25
|
+
)
|
@@ -0,0 +1,27 @@
|
|
1
|
+
"""Module for the LLM class type settings."""
|
2
|
+
|
3
|
+
from pydantic import Field
|
4
|
+
from pydantic_settings import BaseSettings
|
5
|
+
|
6
|
+
from langchain_ocr_lib.impl.llms.llm_type import LLMType
|
7
|
+
|
8
|
+
|
9
|
+
class LlmClassTypeSettings(BaseSettings):
|
10
|
+
"""Settings for the LLM class type.
|
11
|
+
|
12
|
+
Attributes
|
13
|
+
----------
|
14
|
+
llm_type : LLMType
|
15
|
+
The type of LLM to use. Defaults to LLMType.OLLAMA.
|
16
|
+
|
17
|
+
"""
|
18
|
+
|
19
|
+
class Config:
|
20
|
+
"""Config class for reading Fields from env."""
|
21
|
+
|
22
|
+
env_prefix = "RAG_CLASS_TYPE_"
|
23
|
+
case_sensitive = False
|
24
|
+
|
25
|
+
llm_type: LLMType = Field(
|
26
|
+
default=LLMType.OLLAMA,
|
27
|
+
)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""Module that contains settings regarding the LLM."""
|
2
|
+
|
3
|
+
from pydantic import Field
|
4
|
+
from pydantic_settings import BaseSettings
|
5
|
+
|
6
|
+
|
7
|
+
class OllamaSettings(BaseSettings):
|
8
|
+
"""
|
9
|
+
Contains settings regarding the LLM.
|
10
|
+
|
11
|
+
Attributes
|
12
|
+
----------
|
13
|
+
model : str
|
14
|
+
The model name to be used.
|
15
|
+
base_url : str
|
16
|
+
The base URL for the LLM.
|
17
|
+
top_k : int
|
18
|
+
Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers,
|
19
|
+
while a lower value (e.g. 10) will be more conservative.
|
20
|
+
top_p : float
|
21
|
+
Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text,
|
22
|
+
while a lower value (e.g., 0.5) will generate more focused and conservative text.
|
23
|
+
temperature : float
|
24
|
+
The temperature of the model. Increasing the temperature will make the model answer more creatively.
|
25
|
+
|
26
|
+
Notes
|
27
|
+
-----
|
28
|
+
If the title of a field is provided, the field will be configurable in the Langfuse UI
|
29
|
+
the field names should match the names of the attributes in the corresponding LLM class!
|
30
|
+
"""
|
31
|
+
|
32
|
+
class Config:
|
33
|
+
"""Config class for reading Fields from env."""
|
34
|
+
|
35
|
+
env_prefix = "OLLAMA_"
|
36
|
+
case_sensitive = False
|
37
|
+
|
38
|
+
model: str = Field(default="gemma3:4b-it-q4_K_M")
|
39
|
+
base_url: str = Field(default="http://localhost:11434")
|
40
|
+
top_k: int = Field(default=0, title="LLM Top K")
|
41
|
+
top_p: float = Field(default=0, title="LLM Top P")
|
42
|
+
temperature: float = Field(default=0, title="LLM Temperature")
|
@@ -0,0 +1,35 @@
|
|
1
|
+
"""Module contains settings regarding the OpenAI API."""
|
2
|
+
|
3
|
+
from pydantic import Field
|
4
|
+
from pydantic_settings import BaseSettings
|
5
|
+
|
6
|
+
|
7
|
+
class OpenAISettings(BaseSettings):
|
8
|
+
"""
|
9
|
+
Contains settings regarding the OpenAI API.
|
10
|
+
|
11
|
+
Attributes
|
12
|
+
----------
|
13
|
+
model : str
|
14
|
+
The model identifier.
|
15
|
+
api_key : str
|
16
|
+
The API key for authentication.
|
17
|
+
top_p : float
|
18
|
+
Total probability mass of tokens to consider at each step.
|
19
|
+
temperature : float
|
20
|
+
What sampling temperature to use.
|
21
|
+
vision_capable : bool
|
22
|
+
Flag to enable a vision capable model.
|
23
|
+
"""
|
24
|
+
|
25
|
+
class Config:
|
26
|
+
"""Config class for reading fields from environment variables."""
|
27
|
+
|
28
|
+
env_prefix = "OPENAI_"
|
29
|
+
case_sensitive = False
|
30
|
+
|
31
|
+
model: str = Field(default="gpt-4o-mini-search-preview-2025-03-11", description="The model identifier")
|
32
|
+
api_key: str = Field(default="", description="The API key for authentication")
|
33
|
+
top_p: float = Field(default=1.0, description="Total probability mass of tokens to consider at each step")
|
34
|
+
temperature: float = Field(default=0.7, description="What sampling temperature to use")
|
35
|
+
vision_capable: bool = Field(default=False, description="Enable a vision capable model")
|
File without changes
|
@@ -0,0 +1,44 @@
|
|
1
|
+
"""Module for the LangfuseTraceChain class."""
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
import inject
|
6
|
+
from langchain_core.runnables import RunnableConfig
|
7
|
+
from langfuse.callback import CallbackHandler
|
8
|
+
|
9
|
+
from langchain_ocr_lib.impl.settings.langfuse_settings import LangfuseSettings
|
10
|
+
from langchain_ocr_lib.tracers.traced_chain import TracedChain
|
11
|
+
|
12
|
+
|
13
|
+
class LangfuseTracedChain(TracedChain):
|
14
|
+
"""A class to trace the execution of a Runnable using Langfuse.
|
15
|
+
|
16
|
+
This class wraps an inner Runnable and adds tracing capabilities using the Langfuse tracer.
|
17
|
+
It allows for the configuration of the tracer through the provided settings.
|
18
|
+
|
19
|
+
Attributes
|
20
|
+
----------
|
21
|
+
CONFIG_CALLBACK_KEY : str
|
22
|
+
The key used to store callbacks in the configuration.
|
23
|
+
"""
|
24
|
+
|
25
|
+
CONFIG_CALLBACK_KEY = "callbacks"
|
26
|
+
_inner_chain = inject.attr("OcrChain")
|
27
|
+
|
28
|
+
def __init__(self, settings: LangfuseSettings):
|
29
|
+
super().__init__()
|
30
|
+
self._settings = settings
|
31
|
+
|
32
|
+
def _add_tracing_callback(self, session_id: str, config: Optional[RunnableConfig]) -> RunnableConfig:
|
33
|
+
handler = CallbackHandler(
|
34
|
+
public_key=self._settings.public_key,
|
35
|
+
secret_key=self._settings.secret_key,
|
36
|
+
host=self._settings.host,
|
37
|
+
session_id=session_id,
|
38
|
+
)
|
39
|
+
if not config:
|
40
|
+
return RunnableConfig(callbacks=[handler])
|
41
|
+
|
42
|
+
current_callbacks = config.get(self.CONFIG_CALLBACK_KEY, [])
|
43
|
+
config[self.CONFIG_CALLBACK_KEY] = (current_callbacks if current_callbacks else []) + [handler]
|
44
|
+
return config
|
@@ -0,0 +1,19 @@
|
|
1
|
+
"""Module to map language codes to language names using pycountry."""
|
2
|
+
|
3
|
+
import pycountry
|
4
|
+
|
5
|
+
|
6
|
+
def get_language_name_from_pycountry(code: str) -> str:
|
7
|
+
"""Given a language abbreviation (ISO 639-1), return the full language name in English using pycountry."""
|
8
|
+
language = pycountry.languages.get(alpha_2=code.lower())
|
9
|
+
if language:
|
10
|
+
# Sometimes language.name may include extra parts, adjust as needed.
|
11
|
+
return language.name.lower()
|
12
|
+
return None
|
13
|
+
|
14
|
+
|
15
|
+
# Example usage:
|
16
|
+
if __name__ == "__main__":
|
17
|
+
lang_codes = ["en", "de", "ru", "it", "es", "zh", "ja", "fr"]
|
18
|
+
for lang_code in lang_codes:
|
19
|
+
print(f"pycountry: {lang_code} -> {get_language_name_from_pycountry(lang_code)}")
|
@@ -0,0 +1,122 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
"""Command-line interface for langchain-ocr-lib package."""
|
3
|
+
|
4
|
+
import argparse
|
5
|
+
import asyncio
|
6
|
+
import os
|
7
|
+
import sys
|
8
|
+
from typing import Optional
|
9
|
+
|
10
|
+
from langchain_ocr_lib.di_config import configure_di
|
11
|
+
import inject
|
12
|
+
from langchain_ocr_lib.impl.converter.image_converter import Image2MarkdownConverter
|
13
|
+
from langchain_ocr_lib.impl.converter.pdf_converter import Pdf2MarkdownConverter
|
14
|
+
|
15
|
+
|
16
|
+
def setup() -> None:
|
17
|
+
"""Initialize the dependency injection configuration."""
|
18
|
+
configure_di()
|
19
|
+
|
20
|
+
|
21
|
+
async def convert_image_file(file_path: str, output_file: Optional[str] = None) -> str:
|
22
|
+
"""Convert an image file to markdown text.
|
23
|
+
|
24
|
+
Parameters
|
25
|
+
----------
|
26
|
+
file_path : str
|
27
|
+
Path to the image file
|
28
|
+
output_file : Optional[str]
|
29
|
+
Path to save the markdown output, if None prints to stdout
|
30
|
+
|
31
|
+
Returns
|
32
|
+
-------
|
33
|
+
str
|
34
|
+
The markdown text
|
35
|
+
"""
|
36
|
+
if not os.path.exists(file_path):
|
37
|
+
print(f"Error: File {file_path} not found", file=sys.stderr)
|
38
|
+
sys.exit(1)
|
39
|
+
|
40
|
+
converter = inject.instance(Image2MarkdownConverter)
|
41
|
+
|
42
|
+
# Pass the filename directly to the converter
|
43
|
+
result = await converter.aconvert2markdown(file=None, filename=file_path)
|
44
|
+
|
45
|
+
if output_file:
|
46
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
47
|
+
f.write(result)
|
48
|
+
print(f"Markdown saved to {output_file}")
|
49
|
+
else:
|
50
|
+
print(result)
|
51
|
+
|
52
|
+
return result
|
53
|
+
|
54
|
+
|
55
|
+
async def convert_pdf_file(file_path: str, output_file: Optional[str] = None) -> str:
|
56
|
+
"""Convert a PDF file to markdown text.
|
57
|
+
|
58
|
+
Parameters
|
59
|
+
----------
|
60
|
+
file_path : str
|
61
|
+
Path to the PDF file
|
62
|
+
output_file : Optional[str]
|
63
|
+
Path to save the markdown output, if None prints to stdout
|
64
|
+
|
65
|
+
Returns
|
66
|
+
-------
|
67
|
+
str
|
68
|
+
The markdown text
|
69
|
+
"""
|
70
|
+
if not os.path.exists(file_path):
|
71
|
+
print(f"Error: File {file_path} not found", file=sys.stderr)
|
72
|
+
sys.exit(1)
|
73
|
+
|
74
|
+
converter = inject.instance(Pdf2MarkdownConverter)
|
75
|
+
|
76
|
+
# Pass the filename directly to the converter
|
77
|
+
result = await converter.aconvert2markdown(file=None, filename=file_path)
|
78
|
+
|
79
|
+
if output_file:
|
80
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
81
|
+
f.write(result)
|
82
|
+
print(f"Markdown saved to {output_file}")
|
83
|
+
else:
|
84
|
+
print(result)
|
85
|
+
|
86
|
+
return result
|
87
|
+
|
88
|
+
|
89
|
+
def main():
|
90
|
+
"""Run the CLI application."""
|
91
|
+
parser = argparse.ArgumentParser(description="Convert images or PDFs to Markdown")
|
92
|
+
parser.add_argument("file", help="Path to the image or PDF file")
|
93
|
+
parser.add_argument("-o", "--output", help="Output file path (default: print to stdout)", default=None)
|
94
|
+
parser.add_argument(
|
95
|
+
"-t", "--type", choices=["auto", "image", "pdf"], default="auto", help="File type (default: auto-detect)"
|
96
|
+
)
|
97
|
+
|
98
|
+
args = parser.parse_args()
|
99
|
+
|
100
|
+
# Setup dependency injection
|
101
|
+
setup()
|
102
|
+
|
103
|
+
file_type = args.type
|
104
|
+
if file_type == "auto":
|
105
|
+
if args.file.lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".bmp")):
|
106
|
+
file_type = "image"
|
107
|
+
elif args.file.lower().endswith(".pdf"):
|
108
|
+
file_type = "pdf"
|
109
|
+
else:
|
110
|
+
print(f"Error: Could not detect file type of {args.file}", file=sys.stderr)
|
111
|
+
sys.exit(1)
|
112
|
+
|
113
|
+
if file_type == "image":
|
114
|
+
asyncio.run(convert_image_file(args.file, args.output))
|
115
|
+
elif file_type == "pdf":
|
116
|
+
asyncio.run(convert_pdf_file(args.file, args.output))
|
117
|
+
|
118
|
+
|
119
|
+
if __name__ == "__main__":
|
120
|
+
main()
|
121
|
+
|
122
|
+
# langchain-ocr image.png -o output.md
|
File without changes
|
@@ -0,0 +1,60 @@
|
|
1
|
+
import json
|
2
|
+
|
3
|
+
from langchain.prompts import ChatPromptTemplate
|
4
|
+
|
5
|
+
from langchain_ocr_lib.language_mapping.language_mapping import get_language_name_from_pycountry
|
6
|
+
|
7
|
+
|
8
|
+
def ocr_prompt_template_builder(language: str = "en", model_name: str = "") -> str:
|
9
|
+
system_prompt = f"""
|
10
|
+
You are an advanced OCR tool. Your task is to extract all text content from this image in {get_language_name_from_pycountry(language)} **verbatim**, without any modifications, interpretations, summarizations, or omissions by keeping the original format in Markdown. **It is imperative that you do not add, infer, or hallucinate any content that is not explicitly present in the image.**
|
11
|
+
|
12
|
+
**Requirements:** Adhere to the following guidelines:
|
13
|
+
|
14
|
+
- **Headers:** Use Markdown headers (`#`, `##`, `###`, etc.) **only if corresponding heading structures are explicitly present in the image**. Match the level of the header accurately.
|
15
|
+
- **Lists:** Preserve all original list formats (unordered lists using `-` or `*`, and ordered lists with numbers) **exactly as they appear** in the image. Maintain the original indentation.
|
16
|
+
- **Text Formatting:** Retain all visual text formatting (bold, italics, underlines, strikethrough, etc.) using the appropriate Markdown syntax (`**bold**`, `*italic*`, `<u>underline</u>`, `~~strikethrough~~`). If a direct Markdown equivalent doesn't exist, prioritize accuracy of the text content.
|
17
|
+
- **Code Blocks:** If code or preformatted text is detected (often with a distinct font or background), format it using Markdown code blocks (using triple backticks ```).
|
18
|
+
- **Tables:** If tabular data is present, attempt to format it as a Markdown table using pipes `|` and hyphens `-`. If the table structure is complex, prioritize accurate text extraction over perfect table formatting.
|
19
|
+
- **Spacing and Line Breaks:** Maintain original line breaks and spacing to preserve the layout as accurately as possible.
|
20
|
+
|
21
|
+
**Additional Verification:**
|
22
|
+
- After extraction, verify that every Markdown element (headers, lists, code blocks, tables, etc.) exactly reflects the appearance and structure in the image.
|
23
|
+
- Ensure that no part of the content (including headers, footers, and any subtext) is omitted or altered.
|
24
|
+
- If any element is ambiguous, replicate the original formatting as closely as possible.
|
25
|
+
|
26
|
+
**Text Extraction:**
|
27
|
+
- Extract all text content from the image, including headings, paragraphs, lists, tables, and any other textual elements.
|
28
|
+
- Do **not omit** any part of the page.
|
29
|
+
- Accurately replicate all visual formatting such as bold, italics, underlines, and other styles.
|
30
|
+
|
31
|
+
**Example:**
|
32
|
+
If the image contains the following text layout:
|
33
|
+
------------------------------------------------
|
34
|
+
# Chapter 1: Introduction
|
35
|
+
|
36
|
+
Welcome to the document.
|
37
|
+
|
38
|
+
**Key Points:**
|
39
|
+
- Item 1
|
40
|
+
- Item 2
|
41
|
+
|
42
|
+
```python
|
43
|
+
print("Hello, world!")
|
44
|
+
```
|
45
|
+
------------------------------------------------
|
46
|
+
Then your output should be exactly as above, preserving the Markdown syntax for headers, bold text, lists, and code blocks.
|
47
|
+
|
48
|
+
"""
|
49
|
+
|
50
|
+
if "llama3.2" in model_name:
|
51
|
+
system_prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>" + system_prompt + "<|eot_id|>"
|
52
|
+
|
53
|
+
ocr_prompt_template = [
|
54
|
+
{"role": "system", "content": system_prompt},
|
55
|
+
{
|
56
|
+
"role": "user",
|
57
|
+
"content": json.dumps([{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,{image_data}"}}]),
|
58
|
+
},
|
59
|
+
]
|
60
|
+
return ocr_prompt_template
|
File without changes
|
@@ -0,0 +1,88 @@
|
|
1
|
+
"""Module for the TracedGraph class."""
|
2
|
+
|
3
|
+
import uuid
|
4
|
+
from abc import ABC, abstractmethod
|
5
|
+
from typing import Any, Optional
|
6
|
+
|
7
|
+
from langchain_core.runnables import RunnableConfig, ensure_config
|
8
|
+
|
9
|
+
from langchain_ocr_lib.chains.chain import Chain
|
10
|
+
|
11
|
+
RunnableInput = Any
|
12
|
+
RunnableOutput = Any
|
13
|
+
|
14
|
+
|
15
|
+
class TracedChain(Chain[RunnableInput, RunnableOutput], ABC):
|
16
|
+
"""A class to represent a traced graph in an asynchronous chain.
|
17
|
+
|
18
|
+
This class is designed to wrap around an inner Runnable chain and add tracing capabilities to it.
|
19
|
+
It provides methods to asynchronously invoke the chain with tracing and to manage session IDs and tracing callbacks.
|
20
|
+
|
21
|
+
Attributes
|
22
|
+
----------
|
23
|
+
SESSION_ID_KEY : str
|
24
|
+
The key used to store the session ID in the metadata.
|
25
|
+
METADATA_KEY : str
|
26
|
+
The key used to store metadata.
|
27
|
+
"""
|
28
|
+
|
29
|
+
SESSION_ID_KEY = "session_id"
|
30
|
+
METADATA_KEY = "metadata"
|
31
|
+
|
32
|
+
async def ainvoke(
|
33
|
+
self, chain_input: RunnableInput, config: Optional[RunnableConfig] = None, **kwargs: Any
|
34
|
+
) -> RunnableOutput:
|
35
|
+
"""
|
36
|
+
Asynchronously invoke the chain with the given input and configuration.
|
37
|
+
|
38
|
+
Parameters
|
39
|
+
----------
|
40
|
+
chain_input : RunnableInput
|
41
|
+
The input to be processed by the chain.
|
42
|
+
config : Optional[RunnableConfig], optional
|
43
|
+
Configuration for the chain execution (default None).
|
44
|
+
**kwargs : Any
|
45
|
+
Additional keyword arguments.
|
46
|
+
|
47
|
+
Returns
|
48
|
+
-------
|
49
|
+
RunnableOutput
|
50
|
+
The output produced by the chain after processing the input.
|
51
|
+
|
52
|
+
"""
|
53
|
+
config = ensure_config(config)
|
54
|
+
session_id = self._get_session_id(config)
|
55
|
+
config_with_tracing = self._add_tracing_callback(session_id, config)
|
56
|
+
return await self._inner_chain.ainvoke(chain_input, config=config_with_tracing)
|
57
|
+
|
58
|
+
def invoke(
|
59
|
+
self, chain_input: RunnableInput, config: Optional[RunnableConfig] = None, **kwargs: Any
|
60
|
+
) -> RunnableOutput:
|
61
|
+
"""
|
62
|
+
Invoke the chain with the given input and configuration.
|
63
|
+
|
64
|
+
Parameters
|
65
|
+
----------
|
66
|
+
chain_input : RunnableInput
|
67
|
+
The input to be processed by the chain.
|
68
|
+
config : Optional[RunnableConfig], optional
|
69
|
+
Configuration for the chain execution (default None).
|
70
|
+
**kwargs : Any
|
71
|
+
Additional keyword arguments.
|
72
|
+
|
73
|
+
Returns
|
74
|
+
-------
|
75
|
+
RunnableOutput
|
76
|
+
The output produced by the chain after processing the input.
|
77
|
+
"""
|
78
|
+
config = ensure_config(config)
|
79
|
+
session_id = self._get_session_id(config)
|
80
|
+
config_with_tracing = self._add_tracing_callback(session_id, config)
|
81
|
+
return self._inner_chain.invoke(chain_input, config=config_with_tracing)
|
82
|
+
|
83
|
+
@abstractmethod
|
84
|
+
def _add_tracing_callback(self, session_id: str, config: Optional[RunnableConfig]) -> RunnableConfig:
|
85
|
+
...
|
86
|
+
|
87
|
+
def _get_session_id(self, config: Optional[RunnableConfig]) -> str:
|
88
|
+
return config.get(self.METADATA_KEY, {}).get(self.SESSION_ID_KEY, str(uuid.uuid4()))
|
@@ -0,0 +1,28 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: langchain-ocr-lib
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary:
|
5
|
+
License: MIT
|
6
|
+
Author: Andreas Klos
|
7
|
+
Author-email: aklos@outlook.de
|
8
|
+
Requires-Python: >=3.11,<4.0
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
14
|
+
Requires-Dist: deprecated (>=1.2.14,<2.0.0)
|
15
|
+
Requires-Dist: inject (>=5.2.1,<6.0.0)
|
16
|
+
Requires-Dist: langchain-community (>=0.3.19,<0.4.0)
|
17
|
+
Requires-Dist: langchain-ollama (>=0.2.0,<0.3.0)
|
18
|
+
Requires-Dist: langchain-openai (>=0.3.8,<0.4.0)
|
19
|
+
Requires-Dist: langfuse (>=2.59.7,<3.0.0)
|
20
|
+
Requires-Dist: openai (>=1.42.0,<2.0.0)
|
21
|
+
Requires-Dist: pdf2image (>=1.17.0,<2.0.0)
|
22
|
+
Requires-Dist: pillow (>=11.0.0,<12.0.0)
|
23
|
+
Requires-Dist: pycountry (>=24.6.1,<25.0.0)
|
24
|
+
Requires-Dist: pytest-asyncio (>=0.25.0,<0.26.0)
|
25
|
+
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
26
|
+
Description-Content-Type: text/markdown
|
27
|
+
|
28
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
langchain_ocr_lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
langchain_ocr_lib/chains/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
langchain_ocr_lib/chains/chain.py,sha256=D00wnm987YgkJsIAIwQVehX_B4kBOzrjistaPf1M0GE,1946
|
4
|
+
langchain_ocr_lib/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
langchain_ocr_lib/converter/converter.py,sha256=aJuaLX2942d8WRPNaU0cUPO1_266QE7Y6SVKpnxpGBA,1196
|
6
|
+
langchain_ocr_lib/di_config.py,sha256=H1CxtSlzUH3QGkRFBQqgMGJZx5HGWQ0yrB2kEvFIbOk,3083
|
7
|
+
langchain_ocr_lib/impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
langchain_ocr_lib/impl/chains/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
langchain_ocr_lib/impl/chains/ocr_chain.py,sha256=eGiflXVbo1UP56rRHPY6fB4woJtyIvv4SmXNC1RHWFY,2594
|
10
|
+
langchain_ocr_lib/impl/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
langchain_ocr_lib/impl/converter/image_converter.py,sha256=G1rDOCbudWNL4sDvSGJ7CeeFrWUblfWPGaZf5JsnpiM,2871
|
12
|
+
langchain_ocr_lib/impl/converter/pdf_converter.py,sha256=ssj8DL_9wf6kMhjUhDkw0gwSwNLrvgh8nBRspwj60Vk,3510
|
13
|
+
langchain_ocr_lib/impl/langfuse_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
+
langchain_ocr_lib/impl/langfuse_manager/langfuse_manager.py,sha256=G_qGE_-LnPpNJYgkoDoVqoXYkwsaMkB_HN2uSng3YVA,5245
|
15
|
+
langchain_ocr_lib/impl/llms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
+
langchain_ocr_lib/impl/llms/llm_factory.py,sha256=9DsUdoYNrjeWLGA9ISDdHN2cxcQ7DquNQ5it6zSxHlg,2199
|
17
|
+
langchain_ocr_lib/impl/llms/llm_type.py,sha256=_Ap7yStlBn0tyOyfVLH1c2j2A9-ccsTCxAm7bgoRQnM,268
|
18
|
+
langchain_ocr_lib/impl/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
langchain_ocr_lib/impl/settings/langfuse_settings.py,sha256=5lr3tVeiHXDUaYtWAnZPXrKxBJgM2wgaz7yyZThhCsE,812
|
20
|
+
langchain_ocr_lib/impl/settings/language_settings.py,sha256=tdAC1t5wGu1MoH1jhjkDnxnX4Ui7giwxt7Qm8_LPkP8,627
|
21
|
+
langchain_ocr_lib/impl/settings/llm_class_type_settings.py,sha256=4KC6zxby13wn38rB8055J8LNVTsmUfrOiyLtLuToHaM,598
|
22
|
+
langchain_ocr_lib/impl/settings/ollama_chat_settings.py,sha256=8RWMsaK4qDrqC6Mrxekr8IEDYwcvjYwhw9xDwZemxI4,1506
|
23
|
+
langchain_ocr_lib/impl/settings/openai_chat_settings.py,sha256=cXzxe-sea8VCK2M_u9ZIL4l8AR_YdhmA-phZa9fwf8o,1233
|
24
|
+
langchain_ocr_lib/impl/tracers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
|
+
langchain_ocr_lib/impl/tracers/langfuse_traced_chain.py,sha256=lfYLEf9mJ2ie5wofHFG_FUicRi1281XGBC0GKXcAkHM,1546
|
26
|
+
langchain_ocr_lib/language_mapping/language_mapping.py,sha256=VY7WkkZauoHNxkvgUYbig0rDmlKqDkz24cXMd6A7txM,700
|
27
|
+
langchain_ocr_lib/main.py,sha256=_kx6pIsIV9pii2_TSYisFT4tKDQHMHef6buWhWoj11E,3485
|
28
|
+
langchain_ocr_lib/prompt_templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
29
|
+
langchain_ocr_lib/prompt_templates/ocr_prompt.py,sha256=3Be1AL-HJkxPnAP0DNH1MqvAxFWTCeM5UOKP63xkHsY,3543
|
30
|
+
langchain_ocr_lib/tracers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
|
+
langchain_ocr_lib/tracers/traced_chain.py,sha256=uxRkdLNn_G6dAsti_gUuF7muhIj10xrOUL7HUga40oc,3056
|
32
|
+
langchain_ocr_lib-0.1.0.dist-info/METADATA,sha256=2H3iEatfiflH4GcrFhIw2Cg8wjsgKsLoeP2irFsVTio,991
|
33
|
+
langchain_ocr_lib-0.1.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
34
|
+
langchain_ocr_lib-0.1.0.dist-info/entry_points.txt,sha256=l4mIs0tnIgbJYuVveZySQKVBnqNMHS-8ZZtLwz8ag5k,61
|
35
|
+
langchain_ocr_lib-0.1.0.dist-info/RECORD,,
|