meshagent-markitdown 0.0.37__tar.gz → 0.0.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of meshagent-markitdown might be problematic. Click here for more details.
- {meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39}/CHANGELOG.md +6 -0
- {meshagent_markitdown-0.0.37/meshagent_markitdown.egg-info → meshagent_markitdown-0.0.39}/PKG-INFO +3 -3
- meshagent_markitdown-0.0.39/meshagent/markitdown/__init__.py +3 -0
- meshagent_markitdown-0.0.39/meshagent/markitdown/tools/__init__.py +3 -0
- meshagent_markitdown-0.0.39/meshagent/markitdown/tools/markitdown.py +119 -0
- meshagent_markitdown-0.0.39/meshagent/markitdown/version.py +1 -0
- {meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39/meshagent_markitdown.egg-info}/PKG-INFO +3 -3
- {meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39}/meshagent_markitdown.egg-info/requires.txt +2 -2
- {meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39}/pyproject.toml +2 -2
- meshagent_markitdown-0.0.37/meshagent/markitdown/__init__.py +0 -1
- meshagent_markitdown-0.0.37/meshagent/markitdown/tools/__init__.py +0 -1
- meshagent_markitdown-0.0.37/meshagent/markitdown/tools/markitdown.py +0 -188
- meshagent_markitdown-0.0.37/meshagent/markitdown/version.py +0 -1
- {meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39}/LICENSE +0 -0
- {meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39}/MANIFEST.in +0 -0
- {meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39}/README.md +0 -0
- {meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39}/meshagent_markitdown.egg-info/SOURCES.txt +0 -0
- {meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39}/meshagent_markitdown.egg-info/dependency_links.txt +0 -0
- {meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39}/meshagent_markitdown.egg-info/top_level.txt +0 -0
- {meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39}/setup.cfg +0 -0
{meshagent_markitdown-0.0.37/meshagent_markitdown.egg-info → meshagent_markitdown-0.0.39}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: meshagent-markitdown
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.39
|
|
4
4
|
Summary: Markitdown support for Meshagent
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
Project-URL: Documentation, https://docs.meshagent.com
|
|
@@ -9,9 +9,9 @@ Project-URL: Source, https://www.meshagent.com
|
|
|
9
9
|
Requires-Python: >=3.12
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
12
|
-
Requires-Dist: pytest~=8.
|
|
12
|
+
Requires-Dist: pytest~=8.4
|
|
13
13
|
Requires-Dist: pytest-asyncio~=0.26
|
|
14
|
-
Requires-Dist: meshagent-api~=0.0.
|
|
14
|
+
Requires-Dist: meshagent-api~=0.0.39
|
|
15
15
|
Requires-Dist: openapi-core~=0.19
|
|
16
16
|
Requires-Dist: markitdown[docx,outlook,pdf,pptx,xlsx]~=0.1
|
|
17
17
|
Dynamic: license-file
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import aiohttp
|
|
2
|
+
import mimetypes
|
|
3
|
+
from typing import Optional
|
|
4
|
+
import os
|
|
5
|
+
from meshagent.api import EmptyResponse, FileResponse
|
|
6
|
+
from meshagent.tools import (
|
|
7
|
+
Tool,
|
|
8
|
+
ToolContext,
|
|
9
|
+
TextResponse,
|
|
10
|
+
get_bytes_from_url,
|
|
11
|
+
BlobStorage,
|
|
12
|
+
RemoteToolkit,
|
|
13
|
+
)
|
|
14
|
+
import logging
|
|
15
|
+
import asyncio
|
|
16
|
+
import aiofiles
|
|
17
|
+
import markitdown
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger("markitdown")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
supported_extensions = {
|
|
23
|
+
".pdf",
|
|
24
|
+
".docx",
|
|
25
|
+
".pptx",
|
|
26
|
+
".docx",
|
|
27
|
+
".heic",
|
|
28
|
+
".xlsx",
|
|
29
|
+
# TODO: actually supports more formats, do we want others?
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class FileMarkItDownTool(Tool):
|
|
34
|
+
def __init__(self):
|
|
35
|
+
super().__init__(
|
|
36
|
+
name="markitdown_from_file",
|
|
37
|
+
title="MarkItDown File Adapter",
|
|
38
|
+
description="Read the contents of a PDF or Office document from a file path",
|
|
39
|
+
input_schema={
|
|
40
|
+
"type": "object",
|
|
41
|
+
"additionalProperties": False,
|
|
42
|
+
"required": ["path"],
|
|
43
|
+
"properties": {"path": {"type": "string"}},
|
|
44
|
+
},
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
async def execute(self, *, context: ToolContext, path: str):
|
|
48
|
+
filename, ext = os.path.splitext(path)
|
|
49
|
+
if ext in supported_extensions:
|
|
50
|
+
file: FileResponse = await context.room.storage.download(path=path)
|
|
51
|
+
logger.info("adding office metadata for file: {path}".format(path=path))
|
|
52
|
+
async with aiofiles.tempfile.NamedTemporaryFile("wb", suffix=ext) as f:
|
|
53
|
+
await f.write(file.data)
|
|
54
|
+
logger.info("tmp: {path}".format(path=f.name))
|
|
55
|
+
converter = markitdown.MarkItDown()
|
|
56
|
+
|
|
57
|
+
def convert():
|
|
58
|
+
return converter.convert(f.name)
|
|
59
|
+
|
|
60
|
+
result = await asyncio.get_event_loop().run_in_executor(None, convert)
|
|
61
|
+
|
|
62
|
+
return TextResponse(text=result.text_content)
|
|
63
|
+
else:
|
|
64
|
+
return EmptyResponse()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class UrlMarkItDownTool(Tool):
|
|
68
|
+
def __init__(self, blob_storage: Optional[BlobStorage] = None):
|
|
69
|
+
super().__init__(
|
|
70
|
+
name="markitdown_from_url",
|
|
71
|
+
title="MarkItDown URL Adapter",
|
|
72
|
+
description="Read the contents of a PDF or Office document from a URL",
|
|
73
|
+
input_schema={
|
|
74
|
+
"type": "object",
|
|
75
|
+
"additionalProperties": False,
|
|
76
|
+
"required": ["url"],
|
|
77
|
+
"properties": {"url": {"type": "string"}},
|
|
78
|
+
},
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
self._blob_storage = blob_storage
|
|
82
|
+
self._session = aiohttp.ClientSession()
|
|
83
|
+
|
|
84
|
+
async def execute(self, *, context: ToolContext, url: str):
|
|
85
|
+
blob = await get_bytes_from_url(url=url, blob_storage=self._blob_storage)
|
|
86
|
+
|
|
87
|
+
ext = mimetypes.guess_extension(blob.mime_type)
|
|
88
|
+
if ext in supported_extensions:
|
|
89
|
+
async with aiofiles.tempfile.NamedTemporaryFile("wb", suffix=ext) as f:
|
|
90
|
+
# TODO: should protect against too large files with maximum file length?
|
|
91
|
+
await f.write(blob.data)
|
|
92
|
+
|
|
93
|
+
converter = markitdown.MarkItDown()
|
|
94
|
+
|
|
95
|
+
def convert():
|
|
96
|
+
return converter.convert(f.name)
|
|
97
|
+
|
|
98
|
+
result = await asyncio.get_event_loop().run_in_executor(None, convert)
|
|
99
|
+
|
|
100
|
+
return TextResponse(text=result.text_content)
|
|
101
|
+
else:
|
|
102
|
+
raise Exception(
|
|
103
|
+
"Unsupported file type, you cannot use this tool to retreive its content"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class MarkItDownToolkit(RemoteToolkit):
|
|
108
|
+
def __init__(
|
|
109
|
+
self, blob_storage: Optional[BlobStorage] = None, name="meshagent.markitdown"
|
|
110
|
+
):
|
|
111
|
+
super().__init__(
|
|
112
|
+
name=name,
|
|
113
|
+
title="markitdown",
|
|
114
|
+
description="MarkItDown is a utility for converting various files to Markdown",
|
|
115
|
+
tools=[
|
|
116
|
+
FileMarkItDownTool(),
|
|
117
|
+
UrlMarkItDownTool(blob_storage=blob_storage),
|
|
118
|
+
],
|
|
119
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.39"
|
{meshagent_markitdown-0.0.37 → meshagent_markitdown-0.0.39/meshagent_markitdown.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: meshagent-markitdown
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.39
|
|
4
4
|
Summary: Markitdown support for Meshagent
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
Project-URL: Documentation, https://docs.meshagent.com
|
|
@@ -9,9 +9,9 @@ Project-URL: Source, https://www.meshagent.com
|
|
|
9
9
|
Requires-Python: >=3.12
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
12
|
-
Requires-Dist: pytest~=8.
|
|
12
|
+
Requires-Dist: pytest~=8.4
|
|
13
13
|
Requires-Dist: pytest-asyncio~=0.26
|
|
14
|
-
Requires-Dist: meshagent-api~=0.0.
|
|
14
|
+
Requires-Dist: meshagent-api~=0.0.39
|
|
15
15
|
Requires-Dist: openapi-core~=0.19
|
|
16
16
|
Requires-Dist: markitdown[docx,outlook,pdf,pptx,xlsx]~=0.1
|
|
17
17
|
Dynamic: license-file
|
|
@@ -10,9 +10,9 @@ requires-python = ">=3.12"
|
|
|
10
10
|
license = "Apache-2.0"
|
|
11
11
|
keywords = []
|
|
12
12
|
dependencies = [
|
|
13
|
-
"pytest~=8.
|
|
13
|
+
"pytest~=8.4",
|
|
14
14
|
"pytest-asyncio~=0.26",
|
|
15
|
-
"meshagent-api~=0.0.
|
|
15
|
+
"meshagent-api~=0.0.39",
|
|
16
16
|
"openapi-core~=0.19",
|
|
17
17
|
"markitdown[pptx,docx,xlsx,pdf,outlook]~=0.1"
|
|
18
18
|
]
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .version import __version__
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .markitdown import MarkItDownToolkit
|
|
@@ -1,188 +0,0 @@
|
|
|
1
|
-
import fal_client
|
|
2
|
-
import aiohttp
|
|
3
|
-
import mimetypes
|
|
4
|
-
import uuid
|
|
5
|
-
import base64
|
|
6
|
-
from typing import Optional
|
|
7
|
-
from urllib.parse import urlparse
|
|
8
|
-
from openapi_core import OpenAPI
|
|
9
|
-
import json
|
|
10
|
-
import os
|
|
11
|
-
from meshagent.api import EmptyResponse, JsonResponse, FileResponse
|
|
12
|
-
from meshagent.tools import Tool, Toolkit, ToolContext, TextResponse, get_bytes_from_url, BlobStorage, RemoteToolkit
|
|
13
|
-
from copy import deepcopy
|
|
14
|
-
import logging
|
|
15
|
-
import urllib.parse
|
|
16
|
-
import asyncio
|
|
17
|
-
import aiofiles
|
|
18
|
-
import markitdown
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger("markitdown")
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
supported_extensions = {
|
|
25
|
-
".pdf",
|
|
26
|
-
".docx",
|
|
27
|
-
".pptx",
|
|
28
|
-
".docx",
|
|
29
|
-
".heic",
|
|
30
|
-
".xlsx",
|
|
31
|
-
# TODO: actually supports more formats, do we want others?
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
class FileMarkItDownTool(Tool):
|
|
35
|
-
def __init__(self):
|
|
36
|
-
super().__init__(
|
|
37
|
-
name = "markitdown_from_file",
|
|
38
|
-
title = "MarkItDown File Adapter",
|
|
39
|
-
description="Read the contents of a PDF or Office document from a file path",
|
|
40
|
-
input_schema = {
|
|
41
|
-
"type" : "object",
|
|
42
|
-
"additionalProperties" : False,
|
|
43
|
-
"required" : [ "path" ],
|
|
44
|
-
"properties" : {
|
|
45
|
-
"path" : {
|
|
46
|
-
"type" : "string"
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
})
|
|
50
|
-
|
|
51
|
-
async def execute(self, *, context: ToolContext, path: str):
|
|
52
|
-
|
|
53
|
-
filename, ext = os.path.splitext(path)
|
|
54
|
-
if ext in supported_extensions:
|
|
55
|
-
file : FileResponse = await context.room.storage.download(path=path)
|
|
56
|
-
logger.info("adding office metadata for file: {path}".format(path=path))
|
|
57
|
-
async with aiofiles.tempfile.NamedTemporaryFile('wb', suffix=ext) as f:
|
|
58
|
-
await f.write(file.data)
|
|
59
|
-
logger.info("tmp: {path}".format(path=f.name))
|
|
60
|
-
converter = markitdown.MarkItDown()
|
|
61
|
-
def convert():
|
|
62
|
-
return converter.convert(f.name)
|
|
63
|
-
|
|
64
|
-
result = await asyncio.get_event_loop().run_in_executor(None, convert)
|
|
65
|
-
|
|
66
|
-
return TextResponse(text=result.text_content)
|
|
67
|
-
else:
|
|
68
|
-
return EmptyResponse()
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
class UrlMarkItDownTool(Tool):
|
|
72
|
-
def __init__(self, blob_storage: Optional[BlobStorage] = None):
|
|
73
|
-
super().__init__(
|
|
74
|
-
name = "markitdown_from_url",
|
|
75
|
-
title = "MarkItDown URL Adapter",
|
|
76
|
-
description = "Read the contents of a PDF or Office document from a URL",
|
|
77
|
-
input_schema = {
|
|
78
|
-
"type" : "object",
|
|
79
|
-
"additionalProperties" : False,
|
|
80
|
-
"required" : [ "url" ],
|
|
81
|
-
"properties" : {
|
|
82
|
-
"url" : {
|
|
83
|
-
"type" : "string"
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
})
|
|
87
|
-
|
|
88
|
-
self._blob_storage = blob_storage
|
|
89
|
-
self._session = aiohttp.ClientSession()
|
|
90
|
-
|
|
91
|
-
async def execute(self, *, context: ToolContext, url: str):
|
|
92
|
-
|
|
93
|
-
blob = await get_bytes_from_url(url=url, blob_storage=self._blob_storage)
|
|
94
|
-
|
|
95
|
-
ext = mimetypes.guess_extension(blob.mime_type)
|
|
96
|
-
if ext in supported_extensions:
|
|
97
|
-
async with aiofiles.tempfile.NamedTemporaryFile('wb', suffix=ext) as f:
|
|
98
|
-
|
|
99
|
-
# TODO: should protect against too large files with maximum file length?
|
|
100
|
-
await f.write(blob.data)
|
|
101
|
-
|
|
102
|
-
converter = markitdown.MarkItDown()
|
|
103
|
-
def convert():
|
|
104
|
-
return converter.convert(f.name)
|
|
105
|
-
|
|
106
|
-
result = await asyncio.get_event_loop().run_in_executor(None, convert)
|
|
107
|
-
|
|
108
|
-
return TextResponse(text=result.text_content)
|
|
109
|
-
else:
|
|
110
|
-
raise Exception("Unsupported file type, you cannot use this tool to retreive its content")
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
class AskUserMarkItDownTool(Tool):
|
|
114
|
-
def __init__(self):
|
|
115
|
-
super().__init__(
|
|
116
|
-
name = "markitdown_from_user",
|
|
117
|
-
title = "Read a file from a user",
|
|
118
|
-
description = "Read the contents of a PDF or Office document the user. Requires ask_user_file tool to be available at runtime",
|
|
119
|
-
input_schema = {
|
|
120
|
-
"type": "object",
|
|
121
|
-
"additionalProperties": False,
|
|
122
|
-
"required": ["title", "description"],
|
|
123
|
-
"properties": {
|
|
124
|
-
"title": {
|
|
125
|
-
"type": "string",
|
|
126
|
-
"description": "a very short description suitable for a dialog title",
|
|
127
|
-
},
|
|
128
|
-
"description": {
|
|
129
|
-
"type": "string",
|
|
130
|
-
"description":
|
|
131
|
-
"helpful information that explains why this information is being collected and how it will be used",
|
|
132
|
-
},
|
|
133
|
-
},
|
|
134
|
-
}
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
async def execute(self, *, context: ToolContext, title: str, description: str):
|
|
138
|
-
|
|
139
|
-
who = context.caller
|
|
140
|
-
if context.on_behalf_of != None:
|
|
141
|
-
who = context.on_behalf_of
|
|
142
|
-
|
|
143
|
-
file_response : FileResponse = await context.room.agents.invoke_tool(
|
|
144
|
-
participant_id=who.id,
|
|
145
|
-
toolkit="ui",
|
|
146
|
-
tool="ask_user_for_file",
|
|
147
|
-
arguments={
|
|
148
|
-
"title": title,
|
|
149
|
-
"description":description
|
|
150
|
-
}
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
ext = mimetypes.guess_extension(file_response.mime_type)
|
|
155
|
-
|
|
156
|
-
logger.info(f"got file: {file_response.mime_type} {ext}")
|
|
157
|
-
|
|
158
|
-
if ext in supported_extensions:
|
|
159
|
-
async with aiofiles.tempfile.NamedTemporaryFile('wb', suffix=ext) as f:
|
|
160
|
-
|
|
161
|
-
# TODO: should protect against too large files with maximum file length?
|
|
162
|
-
await f.write(file_response.data)
|
|
163
|
-
|
|
164
|
-
converter = markitdown.MarkItDown()
|
|
165
|
-
def convert():
|
|
166
|
-
return converter.convert(f.name)
|
|
167
|
-
|
|
168
|
-
result = await asyncio.get_event_loop().run_in_executor(None, convert)
|
|
169
|
-
|
|
170
|
-
return JsonResponse(json={
|
|
171
|
-
"filename" : file_response.name,
|
|
172
|
-
"mime_type" : file_response.mime_type,
|
|
173
|
-
"content" : result.text_content
|
|
174
|
-
})
|
|
175
|
-
else:
|
|
176
|
-
raise Exception("Unsupported file type, you cannot use this tool to retreive its content")
|
|
177
|
-
|
|
178
|
-
class MarkItDownToolkit(RemoteToolkit):
|
|
179
|
-
def __init__(self, blob_storage: Optional[BlobStorage] = None, name = "meshagent.markitdown"):
|
|
180
|
-
super().__init__(
|
|
181
|
-
name=name,
|
|
182
|
-
title="markitdown",
|
|
183
|
-
description="MarkItDown is a utility for converting various files to Markdown",
|
|
184
|
-
tools=[
|
|
185
|
-
FileMarkItDownTool(),
|
|
186
|
-
UrlMarkItDownTool(blob_storage=blob_storage),
|
|
187
|
-
AskUserMarkItDownTool(),
|
|
188
|
-
])
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.37"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|