meshagent-markitdown 0.0.37__py3-none-any.whl → 0.0.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of meshagent-markitdown might be problematic. Click here for more details.
- meshagent/markitdown/__init__.py +3 -1
- meshagent/markitdown/tools/__init__.py +3 -1
- meshagent/markitdown/tools/markitdown.py +84 -88
- meshagent/markitdown/version.py +1 -1
- {meshagent_markitdown-0.0.37.dist-info → meshagent_markitdown-0.0.38.dist-info}/METADATA +3 -3
- meshagent_markitdown-0.0.38.dist-info/RECORD +9 -0
- meshagent_markitdown-0.0.37.dist-info/RECORD +0 -9
- {meshagent_markitdown-0.0.37.dist-info → meshagent_markitdown-0.0.38.dist-info}/WHEEL +0 -0
- {meshagent_markitdown-0.0.37.dist-info → meshagent_markitdown-0.0.38.dist-info}/licenses/LICENSE +0 -0
- {meshagent_markitdown-0.0.37.dist-info → meshagent_markitdown-0.0.38.dist-info}/top_level.txt +0 -0
meshagent/markitdown/__init__.py
CHANGED
|
@@ -1,18 +1,17 @@
|
|
|
1
|
-
import fal_client
|
|
2
1
|
import aiohttp
|
|
3
2
|
import mimetypes
|
|
4
|
-
import uuid
|
|
5
|
-
import base64
|
|
6
3
|
from typing import Optional
|
|
7
|
-
from urllib.parse import urlparse
|
|
8
|
-
from openapi_core import OpenAPI
|
|
9
|
-
import json
|
|
10
4
|
import os
|
|
11
5
|
from meshagent.api import EmptyResponse, JsonResponse, FileResponse
|
|
12
|
-
from meshagent.tools import
|
|
13
|
-
|
|
6
|
+
from meshagent.tools import (
|
|
7
|
+
Tool,
|
|
8
|
+
ToolContext,
|
|
9
|
+
TextResponse,
|
|
10
|
+
get_bytes_from_url,
|
|
11
|
+
BlobStorage,
|
|
12
|
+
RemoteToolkit,
|
|
13
|
+
)
|
|
14
14
|
import logging
|
|
15
|
-
import urllib.parse
|
|
16
15
|
import asyncio
|
|
17
16
|
import aiofiles
|
|
18
17
|
import markitdown
|
|
@@ -20,44 +19,41 @@ import markitdown
|
|
|
20
19
|
logger = logging.getLogger("markitdown")
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
# TODO: actually supports more formats, do we want others?
|
|
22
|
+
supported_extensions = {
|
|
23
|
+
".pdf",
|
|
24
|
+
".docx",
|
|
25
|
+
".pptx",
|
|
26
|
+
".docx",
|
|
27
|
+
".heic",
|
|
28
|
+
".xlsx",
|
|
29
|
+
# TODO: actually supports more formats, do we want others?
|
|
32
30
|
}
|
|
33
31
|
|
|
32
|
+
|
|
34
33
|
class FileMarkItDownTool(Tool):
|
|
35
34
|
def __init__(self):
|
|
36
35
|
super().__init__(
|
|
37
|
-
name
|
|
38
|
-
title
|
|
39
|
-
description="Read the contents of a PDF or Office document from a file path",
|
|
40
|
-
input_schema
|
|
41
|
-
"type"
|
|
42
|
-
"additionalProperties"
|
|
43
|
-
"required"
|
|
44
|
-
"properties" : {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
})
|
|
50
|
-
|
|
51
|
-
async def execute(self, *, context: ToolContext, path: str):
|
|
36
|
+
name="markitdown_from_file",
|
|
37
|
+
title="MarkItDown File Adapter",
|
|
38
|
+
description="Read the contents of a PDF or Office document from a file path",
|
|
39
|
+
input_schema={
|
|
40
|
+
"type": "object",
|
|
41
|
+
"additionalProperties": False,
|
|
42
|
+
"required": ["path"],
|
|
43
|
+
"properties": {"path": {"type": "string"}},
|
|
44
|
+
},
|
|
45
|
+
)
|
|
52
46
|
|
|
47
|
+
async def execute(self, *, context: ToolContext, path: str):
|
|
53
48
|
filename, ext = os.path.splitext(path)
|
|
54
49
|
if ext in supported_extensions:
|
|
55
|
-
file
|
|
50
|
+
file: FileResponse = await context.room.storage.download(path=path)
|
|
56
51
|
logger.info("adding office metadata for file: {path}".format(path=path))
|
|
57
|
-
async with aiofiles.tempfile.NamedTemporaryFile(
|
|
52
|
+
async with aiofiles.tempfile.NamedTemporaryFile("wb", suffix=ext) as f:
|
|
58
53
|
await f.write(file.data)
|
|
59
54
|
logger.info("tmp: {path}".format(path=f.name))
|
|
60
55
|
converter = markitdown.MarkItDown()
|
|
56
|
+
|
|
61
57
|
def convert():
|
|
62
58
|
return converter.convert(f.name)
|
|
63
59
|
|
|
@@ -69,54 +65,52 @@ class FileMarkItDownTool(Tool):
|
|
|
69
65
|
|
|
70
66
|
|
|
71
67
|
class UrlMarkItDownTool(Tool):
|
|
72
|
-
def __init__(self, blob_storage: Optional[BlobStorage]
|
|
68
|
+
def __init__(self, blob_storage: Optional[BlobStorage] = None):
|
|
73
69
|
super().__init__(
|
|
74
|
-
name
|
|
75
|
-
title
|
|
76
|
-
description
|
|
77
|
-
input_schema
|
|
78
|
-
"type"
|
|
79
|
-
"additionalProperties"
|
|
80
|
-
"required"
|
|
81
|
-
"properties" : {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
})
|
|
70
|
+
name="markitdown_from_url",
|
|
71
|
+
title="MarkItDown URL Adapter",
|
|
72
|
+
description="Read the contents of a PDF or Office document from a URL",
|
|
73
|
+
input_schema={
|
|
74
|
+
"type": "object",
|
|
75
|
+
"additionalProperties": False,
|
|
76
|
+
"required": ["url"],
|
|
77
|
+
"properties": {"url": {"type": "string"}},
|
|
78
|
+
},
|
|
79
|
+
)
|
|
87
80
|
|
|
88
81
|
self._blob_storage = blob_storage
|
|
89
82
|
self._session = aiohttp.ClientSession()
|
|
90
|
-
|
|
83
|
+
|
|
91
84
|
async def execute(self, *, context: ToolContext, url: str):
|
|
92
|
-
|
|
93
85
|
blob = await get_bytes_from_url(url=url, blob_storage=self._blob_storage)
|
|
94
|
-
|
|
86
|
+
|
|
95
87
|
ext = mimetypes.guess_extension(blob.mime_type)
|
|
96
88
|
if ext in supported_extensions:
|
|
97
|
-
async with aiofiles.tempfile.NamedTemporaryFile(
|
|
98
|
-
|
|
89
|
+
async with aiofiles.tempfile.NamedTemporaryFile("wb", suffix=ext) as f:
|
|
99
90
|
# TODO: should protect against too large files with maximum file length?
|
|
100
91
|
await f.write(blob.data)
|
|
101
|
-
|
|
92
|
+
|
|
102
93
|
converter = markitdown.MarkItDown()
|
|
94
|
+
|
|
103
95
|
def convert():
|
|
104
96
|
return converter.convert(f.name)
|
|
105
97
|
|
|
106
|
-
result = await
|
|
98
|
+
result = await asyncio.get_event_loop().run_in_executor(None, convert)
|
|
107
99
|
|
|
108
100
|
return TextResponse(text=result.text_content)
|
|
109
101
|
else:
|
|
110
|
-
raise Exception(
|
|
111
|
-
|
|
102
|
+
raise Exception(
|
|
103
|
+
"Unsupported file type, you cannot use this tool to retreive its content"
|
|
104
|
+
)
|
|
105
|
+
|
|
112
106
|
|
|
113
107
|
class AskUserMarkItDownTool(Tool):
|
|
114
108
|
def __init__(self):
|
|
115
109
|
super().__init__(
|
|
116
|
-
name
|
|
117
|
-
title
|
|
118
|
-
description
|
|
119
|
-
input_schema
|
|
110
|
+
name="markitdown_from_user",
|
|
111
|
+
title="Read a file from a user",
|
|
112
|
+
description="Read the contents of a PDF or Office document the user. Requires ask_user_file tool to be available at runtime",
|
|
113
|
+
input_schema={
|
|
120
114
|
"type": "object",
|
|
121
115
|
"additionalProperties": False,
|
|
122
116
|
"required": ["title", "description"],
|
|
@@ -127,62 +121,64 @@ class AskUserMarkItDownTool(Tool):
|
|
|
127
121
|
},
|
|
128
122
|
"description": {
|
|
129
123
|
"type": "string",
|
|
130
|
-
"description":
|
|
131
|
-
"helpful information that explains why this information is being collected and how it will be used",
|
|
124
|
+
"description": "helpful information that explains why this information is being collected and how it will be used",
|
|
132
125
|
},
|
|
133
126
|
},
|
|
134
|
-
}
|
|
127
|
+
},
|
|
135
128
|
)
|
|
136
|
-
|
|
129
|
+
|
|
137
130
|
async def execute(self, *, context: ToolContext, title: str, description: str):
|
|
138
|
-
|
|
139
131
|
who = context.caller
|
|
140
|
-
if context.on_behalf_of
|
|
132
|
+
if context.on_behalf_of is not None:
|
|
141
133
|
who = context.on_behalf_of
|
|
142
134
|
|
|
143
|
-
file_response
|
|
135
|
+
file_response: FileResponse = await context.room.agents.invoke_tool(
|
|
144
136
|
participant_id=who.id,
|
|
145
137
|
toolkit="ui",
|
|
146
138
|
tool="ask_user_for_file",
|
|
147
|
-
arguments={
|
|
148
|
-
"title": title,
|
|
149
|
-
"description":description
|
|
150
|
-
}
|
|
139
|
+
arguments={"title": title, "description": description},
|
|
151
140
|
)
|
|
152
141
|
|
|
153
|
-
|
|
154
142
|
ext = mimetypes.guess_extension(file_response.mime_type)
|
|
155
143
|
|
|
156
144
|
logger.info(f"got file: {file_response.mime_type} {ext}")
|
|
157
|
-
|
|
158
|
-
if ext in supported_extensions:
|
|
159
|
-
async with aiofiles.tempfile.NamedTemporaryFile('wb', suffix=ext) as f:
|
|
160
145
|
|
|
146
|
+
if ext in supported_extensions:
|
|
147
|
+
async with aiofiles.tempfile.NamedTemporaryFile("wb", suffix=ext) as f:
|
|
161
148
|
# TODO: should protect against too large files with maximum file length?
|
|
162
149
|
await f.write(file_response.data)
|
|
163
|
-
|
|
150
|
+
|
|
164
151
|
converter = markitdown.MarkItDown()
|
|
152
|
+
|
|
165
153
|
def convert():
|
|
166
154
|
return converter.convert(f.name)
|
|
167
155
|
|
|
168
|
-
result = await
|
|
156
|
+
result = await asyncio.get_event_loop().run_in_executor(None, convert)
|
|
169
157
|
|
|
170
|
-
return JsonResponse(
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
158
|
+
return JsonResponse(
|
|
159
|
+
json={
|
|
160
|
+
"filename": file_response.name,
|
|
161
|
+
"mime_type": file_response.mime_type,
|
|
162
|
+
"content": result.text_content,
|
|
163
|
+
}
|
|
164
|
+
)
|
|
175
165
|
else:
|
|
176
|
-
raise Exception(
|
|
166
|
+
raise Exception(
|
|
167
|
+
"Unsupported file type, you cannot use this tool to retreive its content"
|
|
168
|
+
)
|
|
169
|
+
|
|
177
170
|
|
|
178
171
|
class MarkItDownToolkit(RemoteToolkit):
|
|
179
|
-
def __init__(
|
|
172
|
+
def __init__(
|
|
173
|
+
self, blob_storage: Optional[BlobStorage] = None, name="meshagent.markitdown"
|
|
174
|
+
):
|
|
180
175
|
super().__init__(
|
|
181
176
|
name=name,
|
|
182
177
|
title="markitdown",
|
|
183
|
-
description="MarkItDown is a utility for converting various files to Markdown",
|
|
178
|
+
description="MarkItDown is a utility for converting various files to Markdown",
|
|
184
179
|
tools=[
|
|
185
180
|
FileMarkItDownTool(),
|
|
186
181
|
UrlMarkItDownTool(blob_storage=blob_storage),
|
|
187
182
|
AskUserMarkItDownTool(),
|
|
188
|
-
]
|
|
183
|
+
],
|
|
184
|
+
)
|
meshagent/markitdown/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.38"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: meshagent-markitdown
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.38
|
|
4
4
|
Summary: Markitdown support for Meshagent
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
Project-URL: Documentation, https://docs.meshagent.com
|
|
@@ -9,9 +9,9 @@ Project-URL: Source, https://www.meshagent.com
|
|
|
9
9
|
Requires-Python: >=3.12
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
12
|
-
Requires-Dist: pytest~=8.
|
|
12
|
+
Requires-Dist: pytest~=8.4
|
|
13
13
|
Requires-Dist: pytest-asyncio~=0.26
|
|
14
|
-
Requires-Dist: meshagent-api~=0.0.
|
|
14
|
+
Requires-Dist: meshagent-api~=0.0.38
|
|
15
15
|
Requires-Dist: openapi-core~=0.19
|
|
16
16
|
Requires-Dist: markitdown[docx,outlook,pdf,pptx,xlsx]~=0.1
|
|
17
17
|
Dynamic: license-file
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
meshagent/markitdown/__init__.py,sha256=X78Z4yEg5XfkNKH0HiIdG4k1q5ktB-ampTuXHLNFrAw,58
|
|
2
|
+
meshagent/markitdown/version.py,sha256=R5QxTjVaID7odO0eBWpOnyCjNQxBZ7cpyruM_NMOoDc,23
|
|
3
|
+
meshagent/markitdown/tools/__init__.py,sha256=aIzTx7LQwjq3v189-WGXRyPH8GiTdt6mdtfW36Pqz2Q,73
|
|
4
|
+
meshagent/markitdown/tools/markitdown.py,sha256=D5nsR3QWkPoK3JYqvO1lNcjCn6HAv0LIuiE7oaVq2J4,6389
|
|
5
|
+
meshagent_markitdown-0.0.38.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
|
|
6
|
+
meshagent_markitdown-0.0.38.dist-info/METADATA,sha256=6IzfVOUu1dmKJlKeV3Ggf6sR-uNVfSwEQkNs9uIhf04,615
|
|
7
|
+
meshagent_markitdown-0.0.38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
meshagent_markitdown-0.0.38.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
|
|
9
|
+
meshagent_markitdown-0.0.38.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
meshagent/markitdown/__init__.py,sha256=8zLGg-DfQhnDl2Ky0n-zXpN-8e-g7iR0AcaI4l4Vvpk,32
|
|
2
|
-
meshagent/markitdown/version.py,sha256=JaGEpJ5xP3R4j7pGgCziGajlIRjy1_NJdv_OaXPQius,22
|
|
3
|
-
meshagent/markitdown/tools/__init__.py,sha256=6oYMo_jdV0xupyJN_VLK7yFglPE438mhr8zD2z8k4j8,41
|
|
4
|
-
meshagent/markitdown/tools/markitdown.py,sha256=ROOAJXoMKnrpaxU8goFOjiHR4904G1lKUHVb5kCs-SE,6761
|
|
5
|
-
meshagent_markitdown-0.0.37.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
|
|
6
|
-
meshagent_markitdown-0.0.37.dist-info/METADATA,sha256=UelsP54cHAgeuUFNjK71sP06ZKLaTJqrRJhhVklNprc,615
|
|
7
|
-
meshagent_markitdown-0.0.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
-
meshagent_markitdown-0.0.37.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
|
|
9
|
-
meshagent_markitdown-0.0.37.dist-info/RECORD,,
|
|
File without changes
|
{meshagent_markitdown-0.0.37.dist-info → meshagent_markitdown-0.0.38.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{meshagent_markitdown-0.0.37.dist-info → meshagent_markitdown-0.0.38.dist-info}/top_level.txt
RENAMED
|
File without changes
|