meshagent-markitdown 0.0.37__py3-none-any.whl → 0.0.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of meshagent-markitdown might be problematic. Click here for more details.

@@ -1 +1,3 @@
1
- from .version import __version__
1
+ from .version import __version__
2
+
3
+ __all__ = [__version__]
@@ -1 +1,3 @@
1
- from .markitdown import MarkItDownToolkit
1
+ from .markitdown import MarkItDownToolkit
2
+
3
+ __all__ = [MarkItDownToolkit]
@@ -1,18 +1,17 @@
1
- import fal_client
2
1
  import aiohttp
3
2
  import mimetypes
4
- import uuid
5
- import base64
6
3
  from typing import Optional
7
- from urllib.parse import urlparse
8
- from openapi_core import OpenAPI
9
- import json
10
4
  import os
11
5
  from meshagent.api import EmptyResponse, JsonResponse, FileResponse
12
- from meshagent.tools import Tool, Toolkit, ToolContext, TextResponse, get_bytes_from_url, BlobStorage, RemoteToolkit
13
- from copy import deepcopy
6
+ from meshagent.tools import (
7
+ Tool,
8
+ ToolContext,
9
+ TextResponse,
10
+ get_bytes_from_url,
11
+ BlobStorage,
12
+ RemoteToolkit,
13
+ )
14
14
  import logging
15
- import urllib.parse
16
15
  import asyncio
17
16
  import aiofiles
18
17
  import markitdown
@@ -20,44 +19,41 @@ import markitdown
20
19
  logger = logging.getLogger("markitdown")
21
20
 
22
21
 
23
-
24
- supported_extensions = {
25
- ".pdf",
26
- ".docx",
27
- ".pptx",
28
- ".docx",
29
- ".heic",
30
- ".xlsx",
31
- # TODO: actually supports more formats, do we want others?
22
+ supported_extensions = {
23
+ ".pdf",
24
+ ".docx",
25
+ ".pptx",
26
+ ".docx",
27
+ ".heic",
28
+ ".xlsx",
29
+ # TODO: actually supports more formats, do we want others?
32
30
  }
33
31
 
32
+
34
33
  class FileMarkItDownTool(Tool):
35
34
  def __init__(self):
36
35
  super().__init__(
37
- name = "markitdown_from_file",
38
- title = "MarkItDown File Adapter",
39
- description="Read the contents of a PDF or Office document from a file path",
40
- input_schema = {
41
- "type" : "object",
42
- "additionalProperties" : False,
43
- "required" : [ "path" ],
44
- "properties" : {
45
- "path" : {
46
- "type" : "string"
47
- }
48
- }
49
- })
50
-
51
- async def execute(self, *, context: ToolContext, path: str):
36
+ name="markitdown_from_file",
37
+ title="MarkItDown File Adapter",
38
+ description="Read the contents of a PDF or Office document from a file path",
39
+ input_schema={
40
+ "type": "object",
41
+ "additionalProperties": False,
42
+ "required": ["path"],
43
+ "properties": {"path": {"type": "string"}},
44
+ },
45
+ )
52
46
 
47
+ async def execute(self, *, context: ToolContext, path: str):
53
48
  filename, ext = os.path.splitext(path)
54
49
  if ext in supported_extensions:
55
- file : FileResponse = await context.room.storage.download(path=path)
50
+ file: FileResponse = await context.room.storage.download(path=path)
56
51
  logger.info("adding office metadata for file: {path}".format(path=path))
57
- async with aiofiles.tempfile.NamedTemporaryFile('wb', suffix=ext) as f:
52
+ async with aiofiles.tempfile.NamedTemporaryFile("wb", suffix=ext) as f:
58
53
  await f.write(file.data)
59
54
  logger.info("tmp: {path}".format(path=f.name))
60
55
  converter = markitdown.MarkItDown()
56
+
61
57
  def convert():
62
58
  return converter.convert(f.name)
63
59
 
@@ -69,54 +65,52 @@ class FileMarkItDownTool(Tool):
69
65
 
70
66
 
71
67
  class UrlMarkItDownTool(Tool):
72
- def __init__(self, blob_storage: Optional[BlobStorage] = None):
68
+ def __init__(self, blob_storage: Optional[BlobStorage] = None):
73
69
  super().__init__(
74
- name = "markitdown_from_url",
75
- title = "MarkItDown URL Adapter",
76
- description = "Read the contents of a PDF or Office document from a URL",
77
- input_schema = {
78
- "type" : "object",
79
- "additionalProperties" : False,
80
- "required" : [ "url" ],
81
- "properties" : {
82
- "url" : {
83
- "type" : "string"
84
- }
85
- }
86
- })
70
+ name="markitdown_from_url",
71
+ title="MarkItDown URL Adapter",
72
+ description="Read the contents of a PDF or Office document from a URL",
73
+ input_schema={
74
+ "type": "object",
75
+ "additionalProperties": False,
76
+ "required": ["url"],
77
+ "properties": {"url": {"type": "string"}},
78
+ },
79
+ )
87
80
 
88
81
  self._blob_storage = blob_storage
89
82
  self._session = aiohttp.ClientSession()
90
-
83
+
91
84
  async def execute(self, *, context: ToolContext, url: str):
92
-
93
85
  blob = await get_bytes_from_url(url=url, blob_storage=self._blob_storage)
94
-
86
+
95
87
  ext = mimetypes.guess_extension(blob.mime_type)
96
88
  if ext in supported_extensions:
97
- async with aiofiles.tempfile.NamedTemporaryFile('wb', suffix=ext) as f:
98
-
89
+ async with aiofiles.tempfile.NamedTemporaryFile("wb", suffix=ext) as f:
99
90
  # TODO: should protect against too large files with maximum file length?
100
91
  await f.write(blob.data)
101
-
92
+
102
93
  converter = markitdown.MarkItDown()
94
+
103
95
  def convert():
104
96
  return converter.convert(f.name)
105
97
 
106
- result = await asyncio.get_event_loop().run_in_executor(None, convert)
98
+ result = await asyncio.get_event_loop().run_in_executor(None, convert)
107
99
 
108
100
  return TextResponse(text=result.text_content)
109
101
  else:
110
- raise Exception("Unsupported file type, you cannot use this tool to retreive its content")
111
-
102
+ raise Exception(
103
+ "Unsupported file type, you cannot use this tool to retreive its content"
104
+ )
105
+
112
106
 
113
107
  class AskUserMarkItDownTool(Tool):
114
108
  def __init__(self):
115
109
  super().__init__(
116
- name = "markitdown_from_user",
117
- title = "Read a file from a user",
118
- description = "Read the contents of a PDF or Office document the user. Requires ask_user_file tool to be available at runtime",
119
- input_schema = {
110
+ name="markitdown_from_user",
111
+ title="Read a file from a user",
112
+ description="Read the contents of a PDF or Office document the user. Requires ask_user_file tool to be available at runtime",
113
+ input_schema={
120
114
  "type": "object",
121
115
  "additionalProperties": False,
122
116
  "required": ["title", "description"],
@@ -127,62 +121,64 @@ class AskUserMarkItDownTool(Tool):
127
121
  },
128
122
  "description": {
129
123
  "type": "string",
130
- "description":
131
- "helpful information that explains why this information is being collected and how it will be used",
124
+ "description": "helpful information that explains why this information is being collected and how it will be used",
132
125
  },
133
126
  },
134
- }
127
+ },
135
128
  )
136
-
129
+
137
130
  async def execute(self, *, context: ToolContext, title: str, description: str):
138
-
139
131
  who = context.caller
140
- if context.on_behalf_of != None:
132
+ if context.on_behalf_of is not None:
141
133
  who = context.on_behalf_of
142
134
 
143
- file_response : FileResponse = await context.room.agents.invoke_tool(
135
+ file_response: FileResponse = await context.room.agents.invoke_tool(
144
136
  participant_id=who.id,
145
137
  toolkit="ui",
146
138
  tool="ask_user_for_file",
147
- arguments={
148
- "title": title,
149
- "description":description
150
- }
139
+ arguments={"title": title, "description": description},
151
140
  )
152
141
 
153
-
154
142
  ext = mimetypes.guess_extension(file_response.mime_type)
155
143
 
156
144
  logger.info(f"got file: {file_response.mime_type} {ext}")
157
-
158
- if ext in supported_extensions:
159
- async with aiofiles.tempfile.NamedTemporaryFile('wb', suffix=ext) as f:
160
145
 
146
+ if ext in supported_extensions:
147
+ async with aiofiles.tempfile.NamedTemporaryFile("wb", suffix=ext) as f:
161
148
  # TODO: should protect against too large files with maximum file length?
162
149
  await f.write(file_response.data)
163
-
150
+
164
151
  converter = markitdown.MarkItDown()
152
+
165
153
  def convert():
166
154
  return converter.convert(f.name)
167
155
 
168
- result = await asyncio.get_event_loop().run_in_executor(None, convert)
156
+ result = await asyncio.get_event_loop().run_in_executor(None, convert)
169
157
 
170
- return JsonResponse(json={
171
- "filename" : file_response.name,
172
- "mime_type" : file_response.mime_type,
173
- "content" : result.text_content
174
- })
158
+ return JsonResponse(
159
+ json={
160
+ "filename": file_response.name,
161
+ "mime_type": file_response.mime_type,
162
+ "content": result.text_content,
163
+ }
164
+ )
175
165
  else:
176
- raise Exception("Unsupported file type, you cannot use this tool to retreive its content")
166
+ raise Exception(
167
+ "Unsupported file type, you cannot use this tool to retreive its content"
168
+ )
169
+
177
170
 
178
171
  class MarkItDownToolkit(RemoteToolkit):
179
- def __init__(self, blob_storage: Optional[BlobStorage] = None, name = "meshagent.markitdown"):
172
+ def __init__(
173
+ self, blob_storage: Optional[BlobStorage] = None, name="meshagent.markitdown"
174
+ ):
180
175
  super().__init__(
181
176
  name=name,
182
177
  title="markitdown",
183
- description="MarkItDown is a utility for converting various files to Markdown",
178
+ description="MarkItDown is a utility for converting various files to Markdown",
184
179
  tools=[
185
180
  FileMarkItDownTool(),
186
181
  UrlMarkItDownTool(blob_storage=blob_storage),
187
182
  AskUserMarkItDownTool(),
188
- ])
183
+ ],
184
+ )
@@ -1 +1 @@
1
- __version__ = "0.0.37"
1
+ __version__ = "0.0.38"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: meshagent-markitdown
3
- Version: 0.0.37
3
+ Version: 0.0.38
4
4
  Summary: Markitdown support for Meshagent
5
5
  License-Expression: Apache-2.0
6
6
  Project-URL: Documentation, https://docs.meshagent.com
@@ -9,9 +9,9 @@ Project-URL: Source, https://www.meshagent.com
9
9
  Requires-Python: >=3.12
10
10
  Description-Content-Type: text/markdown
11
11
  License-File: LICENSE
12
- Requires-Dist: pytest~=8.3
12
+ Requires-Dist: pytest~=8.4
13
13
  Requires-Dist: pytest-asyncio~=0.26
14
- Requires-Dist: meshagent-api~=0.0.37
14
+ Requires-Dist: meshagent-api~=0.0.38
15
15
  Requires-Dist: openapi-core~=0.19
16
16
  Requires-Dist: markitdown[docx,outlook,pdf,pptx,xlsx]~=0.1
17
17
  Dynamic: license-file
@@ -0,0 +1,9 @@
1
+ meshagent/markitdown/__init__.py,sha256=X78Z4yEg5XfkNKH0HiIdG4k1q5ktB-ampTuXHLNFrAw,58
2
+ meshagent/markitdown/version.py,sha256=R5QxTjVaID7odO0eBWpOnyCjNQxBZ7cpyruM_NMOoDc,23
3
+ meshagent/markitdown/tools/__init__.py,sha256=aIzTx7LQwjq3v189-WGXRyPH8GiTdt6mdtfW36Pqz2Q,73
4
+ meshagent/markitdown/tools/markitdown.py,sha256=D5nsR3QWkPoK3JYqvO1lNcjCn6HAv0LIuiE7oaVq2J4,6389
5
+ meshagent_markitdown-0.0.38.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
6
+ meshagent_markitdown-0.0.38.dist-info/METADATA,sha256=6IzfVOUu1dmKJlKeV3Ggf6sR-uNVfSwEQkNs9uIhf04,615
7
+ meshagent_markitdown-0.0.38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
+ meshagent_markitdown-0.0.38.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
9
+ meshagent_markitdown-0.0.38.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- meshagent/markitdown/__init__.py,sha256=8zLGg-DfQhnDl2Ky0n-zXpN-8e-g7iR0AcaI4l4Vvpk,32
2
- meshagent/markitdown/version.py,sha256=JaGEpJ5xP3R4j7pGgCziGajlIRjy1_NJdv_OaXPQius,22
3
- meshagent/markitdown/tools/__init__.py,sha256=6oYMo_jdV0xupyJN_VLK7yFglPE438mhr8zD2z8k4j8,41
4
- meshagent/markitdown/tools/markitdown.py,sha256=ROOAJXoMKnrpaxU8goFOjiHR4904G1lKUHVb5kCs-SE,6761
5
- meshagent_markitdown-0.0.37.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
6
- meshagent_markitdown-0.0.37.dist-info/METADATA,sha256=UelsP54cHAgeuUFNjK71sP06ZKLaTJqrRJhhVklNprc,615
7
- meshagent_markitdown-0.0.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
- meshagent_markitdown-0.0.37.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
9
- meshagent_markitdown-0.0.37.dist-info/RECORD,,