flock-core 0.5.0b5__py3-none-any.whl → 0.5.0b6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flock-core might be problematic. Click here for more details.
- {flock_core-0.5.0b5.dist-info → flock_core-0.5.0b6.dist-info}/METADATA +2 -41
- {flock_core-0.5.0b5.dist-info → flock_core-0.5.0b6.dist-info}/RECORD +5 -15
- flock/tools/__init__.py +0 -0
- flock/tools/azure_tools.py +0 -781
- flock/tools/code_tools.py +0 -167
- flock/tools/file_tools.py +0 -149
- flock/tools/github_tools.py +0 -157
- flock/tools/markdown_tools.py +0 -204
- flock/tools/system_tools.py +0 -9
- flock/tools/text_tools.py +0 -809
- flock/tools/web_tools.py +0 -90
- flock/tools/zendesk_tools.py +0 -147
- {flock_core-0.5.0b5.dist-info → flock_core-0.5.0b6.dist-info}/WHEEL +0 -0
- {flock_core-0.5.0b5.dist-info → flock_core-0.5.0b6.dist-info}/entry_points.txt +0 -0
- {flock_core-0.5.0b5.dist-info → flock_core-0.5.0b6.dist-info}/licenses/LICENSE +0 -0
flock/tools/code_tools.py
DELETED
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
from flock.core.interpreter.python_interpreter import PythonInterpreter
|
|
2
|
-
from flock.core.logging.trace_and_logged import traced_and_logged
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
@traced_and_logged
|
|
6
|
-
def code_evaluate_math(expression: str) -> float:
|
|
7
|
-
try:
|
|
8
|
-
result = PythonInterpreter(
|
|
9
|
-
{},
|
|
10
|
-
[
|
|
11
|
-
"os",
|
|
12
|
-
"math",
|
|
13
|
-
"random",
|
|
14
|
-
"datetime",
|
|
15
|
-
"time",
|
|
16
|
-
"string",
|
|
17
|
-
"collections",
|
|
18
|
-
"itertools",
|
|
19
|
-
"functools",
|
|
20
|
-
"typing",
|
|
21
|
-
"enum",
|
|
22
|
-
"json",
|
|
23
|
-
"ast",
|
|
24
|
-
],
|
|
25
|
-
verbose=True,
|
|
26
|
-
).execute(expression)
|
|
27
|
-
return result
|
|
28
|
-
except Exception:
|
|
29
|
-
raise
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
@traced_and_logged
|
|
33
|
-
def code_code_eval(python_code: str) -> str:
|
|
34
|
-
"""A Python code evaluation tool that executes Python code and returns the result.
|
|
35
|
-
|
|
36
|
-
The code may not be markdown-escaped with triple backticks.
|
|
37
|
-
It is expected to be a valid Python code snippet that can be executed directly.
|
|
38
|
-
The code is executed in a controlled environment with a limited set of libraries.
|
|
39
|
-
It allows the use of the following libraries:
|
|
40
|
-
"os",
|
|
41
|
-
"math",
|
|
42
|
-
"random",
|
|
43
|
-
"datetime",
|
|
44
|
-
"time",
|
|
45
|
-
"string",
|
|
46
|
-
"collections",
|
|
47
|
-
"itertools",
|
|
48
|
-
"functools",
|
|
49
|
-
"typing",
|
|
50
|
-
"enum",
|
|
51
|
-
"json",
|
|
52
|
-
"ast",
|
|
53
|
-
"numpy",
|
|
54
|
-
"sympy",
|
|
55
|
-
"pandas",
|
|
56
|
-
"httpx",
|
|
57
|
-
"""
|
|
58
|
-
try:
|
|
59
|
-
result = PythonInterpreter(
|
|
60
|
-
{},
|
|
61
|
-
[
|
|
62
|
-
"os",
|
|
63
|
-
"math",
|
|
64
|
-
"random",
|
|
65
|
-
"datetime",
|
|
66
|
-
"time",
|
|
67
|
-
"string",
|
|
68
|
-
"collections",
|
|
69
|
-
"itertools",
|
|
70
|
-
"functools",
|
|
71
|
-
"typing",
|
|
72
|
-
"enum",
|
|
73
|
-
"json",
|
|
74
|
-
"ast",
|
|
75
|
-
"numpy",
|
|
76
|
-
"sympy",
|
|
77
|
-
"pandas",
|
|
78
|
-
],
|
|
79
|
-
verbose=True,
|
|
80
|
-
).execute(python_code)
|
|
81
|
-
return result
|
|
82
|
-
except Exception:
|
|
83
|
-
raise
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
@traced_and_logged
|
|
87
|
-
def docker_code_execute(python_code: str) -> str:
|
|
88
|
-
"""Execute Python code in a sandboxed Docker container."""
|
|
89
|
-
import ast
|
|
90
|
-
import os
|
|
91
|
-
import pathlib
|
|
92
|
-
import platform
|
|
93
|
-
import shutil
|
|
94
|
-
import textwrap
|
|
95
|
-
import uuid
|
|
96
|
-
|
|
97
|
-
import docker
|
|
98
|
-
def _auto_print_last_expr(code: str) -> str:
|
|
99
|
-
"""If the last top-level statement is a bare expression,
|
|
100
|
-
append a print() so script mode surfaces its value.
|
|
101
|
-
"""
|
|
102
|
-
tree = ast.parse(code, mode="exec")
|
|
103
|
-
if tree.body and isinstance(tree.body[-1], ast.Expr):
|
|
104
|
-
# Re-extract the exact source of that expression
|
|
105
|
-
expr_src = textwrap.dedent(
|
|
106
|
-
code.splitlines()[tree.body[-1].lineno - 1]
|
|
107
|
-
)
|
|
108
|
-
code += f"\nprint({expr_src})"
|
|
109
|
-
return code
|
|
110
|
-
# --- 1. Figure out a base directory that exists on this OS ----------
|
|
111
|
-
if platform.system() == "Windows":
|
|
112
|
-
base_dir = pathlib.Path(os.getenv("SANDBOX_BASE_DIR", r"C:\sandboxes"))
|
|
113
|
-
else: # Linux, macOS, WSL2
|
|
114
|
-
base_dir = pathlib.Path(os.getenv("SANDBOX_BASE_DIR", "/var/sandboxes"))
|
|
115
|
-
|
|
116
|
-
base_dir.mkdir(parents=True, exist_ok=True)
|
|
117
|
-
|
|
118
|
-
sandbox_id = f"sbox-{uuid.uuid4()}"
|
|
119
|
-
workdir = base_dir / sandbox_id
|
|
120
|
-
workdir.mkdir(parents=True, exist_ok=False)
|
|
121
|
-
|
|
122
|
-
# Docker’s HTTP API always wants POSIX‐style paths (“/”, drive letter allowed).
|
|
123
|
-
host_path = workdir.resolve().as_posix() # e.g. "C:/sandboxes/…"
|
|
124
|
-
|
|
125
|
-
client = docker.from_env()
|
|
126
|
-
image = "python:3.12-slim"
|
|
127
|
-
|
|
128
|
-
# --- 2. Decide whether we can / should request the gVisor runtime ---
|
|
129
|
-
runtime_args = {}
|
|
130
|
-
if platform.system() != "Windows" and shutil.which("runsc"):
|
|
131
|
-
runtime_args["runtime"] = "runsc" # gVisor on Linux & macOS
|
|
132
|
-
|
|
133
|
-
container = client.containers.run(
|
|
134
|
-
image,
|
|
135
|
-
name=sandbox_id,
|
|
136
|
-
command=["sleep", "infinity"],
|
|
137
|
-
user="65534:65534", # nobody
|
|
138
|
-
network_mode="none",
|
|
139
|
-
volumes={host_path: {"bind": "/workspace", "mode": "rw"}},
|
|
140
|
-
mem_limit="4g",
|
|
141
|
-
cpu_period=100_000,
|
|
142
|
-
cpu_quota=200_000, # 2 vCPU
|
|
143
|
-
security_opt=["no-new-privileges"],
|
|
144
|
-
detach=True,
|
|
145
|
-
**runtime_args,
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
try:
|
|
149
|
-
def exec_code(cmd: list[str], timeout: int = 30) -> str:
|
|
150
|
-
exec_id = client.api.exec_create(
|
|
151
|
-
container.id, cmd, workdir="/workspace"
|
|
152
|
-
)["Id"]
|
|
153
|
-
return client.api.exec_start(
|
|
154
|
-
exec_id, stream=False, demux=False, tty=False,
|
|
155
|
-
).decode()
|
|
156
|
-
|
|
157
|
-
# --- 3. Copy code in and execute --------------------------------
|
|
158
|
-
(workdir / "main.py").write_text(_auto_print_last_expr(python_code), encoding="utf-8")
|
|
159
|
-
stdout = exec_code(["python", "main.py"], timeout=30)
|
|
160
|
-
return stdout.strip()
|
|
161
|
-
|
|
162
|
-
finally:
|
|
163
|
-
# --- 4. Tear everything down ------------------------------------
|
|
164
|
-
container.remove(force=True)
|
|
165
|
-
shutil.rmtree(workdir, ignore_errors=True)
|
|
166
|
-
|
|
167
|
-
|
flock/tools/file_tools.py
DELETED
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
import importlib
|
|
2
|
-
import json
|
|
3
|
-
from typing import Any
|
|
4
|
-
|
|
5
|
-
from flock.core.logging.trace_and_logged import traced_and_logged
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@traced_and_logged
|
|
9
|
-
def file_get_anything_as_markdown(url_or_file_path: str):
|
|
10
|
-
if importlib.util.find_spec("docling") is not None:
|
|
11
|
-
from docling.document_converter import DocumentConverter
|
|
12
|
-
|
|
13
|
-
try:
|
|
14
|
-
converter = DocumentConverter()
|
|
15
|
-
result = converter.convert(url_or_file_path)
|
|
16
|
-
markdown = result.document.export_to_markdown()
|
|
17
|
-
return markdown
|
|
18
|
-
except Exception:
|
|
19
|
-
raise
|
|
20
|
-
else:
|
|
21
|
-
raise ImportError(
|
|
22
|
-
"Optional tool dependencies not installed. Install with 'pip install flock-core[file-tools]'."
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
@traced_and_logged
|
|
27
|
-
def file_append_to_file(content: str, filename: str):
|
|
28
|
-
try:
|
|
29
|
-
with open(filename, "a", encoding="utf-8") as f:
|
|
30
|
-
f.write(content)
|
|
31
|
-
except Exception:
|
|
32
|
-
raise
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
@traced_and_logged
|
|
36
|
-
def file_save_to_file(content: str, filename: str):
|
|
37
|
-
try:
|
|
38
|
-
with open(filename, "w", encoding="utf-8") as f:
|
|
39
|
-
f.write(content)
|
|
40
|
-
except Exception:
|
|
41
|
-
raise
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
@traced_and_logged
|
|
45
|
-
def file_read_from_file(filename: str) -> str:
|
|
46
|
-
with open(filename, encoding="utf-8") as file:
|
|
47
|
-
return file.read()
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
@traced_and_logged
|
|
51
|
-
def file_json_parse_safe(text: str) -> dict:
|
|
52
|
-
try:
|
|
53
|
-
result = json.loads(text)
|
|
54
|
-
return result
|
|
55
|
-
except Exception:
|
|
56
|
-
return {}
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
@traced_and_logged
|
|
60
|
-
def file_json_search(
|
|
61
|
-
json_file_path: str, search_query: str, case_sensitive: bool = False
|
|
62
|
-
) -> list:
|
|
63
|
-
"""Search a JSON file for objects containing the specified search query.
|
|
64
|
-
|
|
65
|
-
Args:
|
|
66
|
-
json_file_path (str): Path to the JSON file to search
|
|
67
|
-
search_query (str): Text to search for within the JSON objects
|
|
68
|
-
case_sensitive (bool, optional): Whether to perform a case-sensitive search. Defaults to False.
|
|
69
|
-
|
|
70
|
-
Returns:
|
|
71
|
-
list: List of JSON objects (as dicts) that contain the search query
|
|
72
|
-
|
|
73
|
-
Example:
|
|
74
|
-
>>> matching_tickets = file_json_search("tickets.json", "error 404")
|
|
75
|
-
>>> print(
|
|
76
|
-
... f"Found {len(matching_tickets)} tickets mentioning '404 error'"
|
|
77
|
-
... )
|
|
78
|
-
"""
|
|
79
|
-
try:
|
|
80
|
-
# Read the JSON file
|
|
81
|
-
file_content = file_read_from_file(json_file_path)
|
|
82
|
-
|
|
83
|
-
# Parse the JSON content
|
|
84
|
-
json_data = file_json_parse_safe(file_content)
|
|
85
|
-
|
|
86
|
-
# Convert search query to lowercase if case-insensitive search
|
|
87
|
-
if not case_sensitive:
|
|
88
|
-
search_query = search_query.lower()
|
|
89
|
-
|
|
90
|
-
results = []
|
|
91
|
-
|
|
92
|
-
# Determine if the JSON root is an object or array
|
|
93
|
-
if isinstance(json_data, dict):
|
|
94
|
-
# Handle case where root is a dictionary object
|
|
95
|
-
for key, value in json_data.items():
|
|
96
|
-
if isinstance(value, list):
|
|
97
|
-
# If this key contains a list of objects, search within them
|
|
98
|
-
matching_items = _search_in_list(
|
|
99
|
-
value, search_query, case_sensitive
|
|
100
|
-
)
|
|
101
|
-
results.extend(matching_items)
|
|
102
|
-
elif _contains_text(value, search_query, case_sensitive):
|
|
103
|
-
# The entire object matches
|
|
104
|
-
results.append(json_data)
|
|
105
|
-
break
|
|
106
|
-
elif isinstance(json_data, list):
|
|
107
|
-
# Handle case where root is an array
|
|
108
|
-
matching_items = _search_in_list(
|
|
109
|
-
json_data, search_query, case_sensitive
|
|
110
|
-
)
|
|
111
|
-
results.extend(matching_items)
|
|
112
|
-
|
|
113
|
-
return results
|
|
114
|
-
|
|
115
|
-
except Exception as e:
|
|
116
|
-
return [{"error": f"Error searching JSON file: {e!s}"}]
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def _search_in_list(
|
|
120
|
-
items: list, search_query: str, case_sensitive: bool
|
|
121
|
-
) -> list:
|
|
122
|
-
"""Helper function to search for text in a list of items."""
|
|
123
|
-
matching_items = []
|
|
124
|
-
for item in items:
|
|
125
|
-
if _contains_text(item, search_query, case_sensitive):
|
|
126
|
-
matching_items.append(item)
|
|
127
|
-
return matching_items
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def _contains_text(obj: Any, search_query: str, case_sensitive: bool) -> bool:
|
|
131
|
-
"""Recursively check if an object contains the search query in any of its string values."""
|
|
132
|
-
if isinstance(obj, str):
|
|
133
|
-
# For string values, check if they contain the search query
|
|
134
|
-
if case_sensitive:
|
|
135
|
-
return search_query in obj
|
|
136
|
-
else:
|
|
137
|
-
return search_query in obj.lower()
|
|
138
|
-
elif isinstance(obj, dict):
|
|
139
|
-
# For dictionaries, check each value
|
|
140
|
-
for value in obj.values():
|
|
141
|
-
if _contains_text(value, search_query, case_sensitive):
|
|
142
|
-
return True
|
|
143
|
-
elif isinstance(obj, list):
|
|
144
|
-
# For lists, check each item
|
|
145
|
-
for item in obj:
|
|
146
|
-
if _contains_text(item, search_query, case_sensitive):
|
|
147
|
-
return True
|
|
148
|
-
# For other types (numbers, booleans, None), return False
|
|
149
|
-
return False
|
flock/tools/github_tools.py
DELETED
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
"""This module provides tools for interacting with GitHub repositories."""
|
|
2
|
-
|
|
3
|
-
import base64
|
|
4
|
-
import os
|
|
5
|
-
|
|
6
|
-
import httpx
|
|
7
|
-
|
|
8
|
-
from flock.core.logging.trace_and_logged import traced_and_logged
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@traced_and_logged
|
|
12
|
-
def github_create_user_stories_as_github_issue(title: str, body: str) -> str:
|
|
13
|
-
github_pat = os.getenv("GITHUB_PAT")
|
|
14
|
-
github_repo = os.getenv("GITHUB_REPO")
|
|
15
|
-
|
|
16
|
-
url = f"https://api.github.com/repos/{github_repo}/issues"
|
|
17
|
-
headers = {
|
|
18
|
-
"Authorization": f"Bearer {github_pat}",
|
|
19
|
-
"Accept": "application/vnd.github+json",
|
|
20
|
-
}
|
|
21
|
-
issue_title = title
|
|
22
|
-
issue_body = body
|
|
23
|
-
|
|
24
|
-
payload = {"title": issue_title, "body": issue_body}
|
|
25
|
-
response = httpx.post(url, json=payload, headers=headers)
|
|
26
|
-
|
|
27
|
-
if response.status_code == 201:
|
|
28
|
-
return "Issue created successfully."
|
|
29
|
-
else:
|
|
30
|
-
return "Failed to create issue. Please try again later."
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@traced_and_logged
|
|
34
|
-
def github_upload_readme(content: str):
|
|
35
|
-
GITHUB_USERNAME = os.getenv("GITHUB_USERNAME")
|
|
36
|
-
REPO_NAME = os.getenv("GITHUB_REPO")
|
|
37
|
-
GITHUB_TOKEN = os.getenv("GITHUB_PAT")
|
|
38
|
-
|
|
39
|
-
if not GITHUB_USERNAME or not REPO_NAME or not GITHUB_TOKEN:
|
|
40
|
-
raise ValueError(
|
|
41
|
-
"Missing environment variables: GITHUB_USERNAME, GITHUB_REPO, or GITHUB_PAT"
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
GITHUB_API_URL = f"https://api.github.com/repos/{GITHUB_USERNAME}/{REPO_NAME}/contents/README.md"
|
|
45
|
-
|
|
46
|
-
encoded_content = base64.b64encode(content.encode()).decode()
|
|
47
|
-
|
|
48
|
-
with httpx.Client() as client:
|
|
49
|
-
response = client.get(
|
|
50
|
-
GITHUB_API_URL,
|
|
51
|
-
headers={
|
|
52
|
-
"Authorization": f"Bearer {GITHUB_TOKEN}",
|
|
53
|
-
"Accept": "application/vnd.github.v3+json",
|
|
54
|
-
},
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
data = response.json()
|
|
58
|
-
sha = data.get("sha", None)
|
|
59
|
-
|
|
60
|
-
payload = {
|
|
61
|
-
"message": "Updating README.md",
|
|
62
|
-
"content": encoded_content,
|
|
63
|
-
"branch": "main",
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
if sha:
|
|
67
|
-
payload["sha"] = sha
|
|
68
|
-
|
|
69
|
-
response = client.put(
|
|
70
|
-
GITHUB_API_URL,
|
|
71
|
-
json=payload,
|
|
72
|
-
headers={
|
|
73
|
-
"Authorization": f"Bearer {GITHUB_TOKEN}",
|
|
74
|
-
"Accept": "application/vnd.github.v3+json",
|
|
75
|
-
},
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
if response.status_code in [200, 201]:
|
|
79
|
-
print("README.md successfully uploaded/updated!")
|
|
80
|
-
else:
|
|
81
|
-
print("Failed to upload README.md:", response.json())
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
@traced_and_logged
|
|
85
|
-
def github_create_files(file_paths) -> str:
|
|
86
|
-
"""Create multiple files in a GitHub repository with a predefined content.
|
|
87
|
-
|
|
88
|
-
This function iterates over a list of file paths (relative to the repository root) and creates
|
|
89
|
-
each file in the specified GitHub repository with the content "#created by flock". For each file,
|
|
90
|
-
it checks whether the file already exists; if it does, that file is skipped. The function
|
|
91
|
-
uses the following environment variables for authentication and repository information:
|
|
92
|
-
|
|
93
|
-
- GITHUB_USERNAME: Your GitHub username.
|
|
94
|
-
- GITHUB_REPO: The name of the repository.
|
|
95
|
-
- GITHUB_PAT: Your GitHub Personal Access Token for authentication.
|
|
96
|
-
|
|
97
|
-
Parameters:
|
|
98
|
-
file_paths (list of str): A list of file paths (relative to the repository root) to be created.
|
|
99
|
-
|
|
100
|
-
Returns:
|
|
101
|
-
str: A message indicating whether the files were created successfully or if there was a failure.
|
|
102
|
-
"""
|
|
103
|
-
try:
|
|
104
|
-
GITHUB_USERNAME = os.getenv("GITHUB_USERNAME")
|
|
105
|
-
REPO_NAME = os.getenv("GITHUB_REPO")
|
|
106
|
-
GITHUB_TOKEN = os.getenv("GITHUB_PAT")
|
|
107
|
-
|
|
108
|
-
if not GITHUB_USERNAME or not REPO_NAME or not GITHUB_TOKEN:
|
|
109
|
-
raise ValueError(
|
|
110
|
-
"Missing environment variables: GITHUB_USERNAME, GITHUB_REPO, or GITHUB_PAT"
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
encoded_content = base64.b64encode(b"#created by flock").decode()
|
|
114
|
-
|
|
115
|
-
with httpx.Client() as client:
|
|
116
|
-
for file_path in file_paths:
|
|
117
|
-
GITHUB_API_URL = f"https://api.github.com/repos/{GITHUB_USERNAME}/{REPO_NAME}/contents/{file_path}"
|
|
118
|
-
|
|
119
|
-
response = client.get(
|
|
120
|
-
GITHUB_API_URL,
|
|
121
|
-
headers={
|
|
122
|
-
"Authorization": f"token {GITHUB_TOKEN}",
|
|
123
|
-
"Accept": "application/vnd.github.v3+json",
|
|
124
|
-
},
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
data = response.json()
|
|
128
|
-
sha = data.get("sha", None)
|
|
129
|
-
|
|
130
|
-
payload = {
|
|
131
|
-
"message": f"Creating {file_path}",
|
|
132
|
-
"content": encoded_content,
|
|
133
|
-
"branch": "main",
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
if sha:
|
|
137
|
-
print(f"Skipping {file_path}, file already exists.")
|
|
138
|
-
continue
|
|
139
|
-
|
|
140
|
-
response = client.put(
|
|
141
|
-
GITHUB_API_URL,
|
|
142
|
-
json=payload,
|
|
143
|
-
headers={
|
|
144
|
-
"Authorization": f"token {GITHUB_TOKEN}",
|
|
145
|
-
"Accept": "application/vnd.github.v3+json",
|
|
146
|
-
},
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
if response.status_code in [200, 201]:
|
|
150
|
-
print(f"{file_path} successfully created!")
|
|
151
|
-
else:
|
|
152
|
-
print(f"Failed to create {file_path}:", response.json())
|
|
153
|
-
|
|
154
|
-
return "Files created successfully."
|
|
155
|
-
|
|
156
|
-
except Exception:
|
|
157
|
-
return "Failed to create file. Please try again later."
|
flock/tools/markdown_tools.py
DELETED
|
@@ -1,204 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from typing import Any
|
|
3
|
-
|
|
4
|
-
from flock.core.logging.trace_and_logged import traced_and_logged
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@traced_and_logged
|
|
8
|
-
def markdown_split_by_headers(
|
|
9
|
-
markdown_text: str, min_header_level: int = 1, max_header_level: int = 2
|
|
10
|
-
) -> list[dict[str, Any]]:
|
|
11
|
-
if not markdown_text:
|
|
12
|
-
return []
|
|
13
|
-
|
|
14
|
-
# Pattern to match headers from level min_header_level to max_header_level
|
|
15
|
-
header_pattern = re.compile(
|
|
16
|
-
f"^({'#' * min_header_level}){{'1,{max_header_level - min_header_level + 1}'}}\\s+(.+)$",
|
|
17
|
-
re.MULTILINE,
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
# Find all headers
|
|
21
|
-
headers = list(header_pattern.finditer(markdown_text))
|
|
22
|
-
|
|
23
|
-
if not headers:
|
|
24
|
-
return [{"title": "Text", "content": markdown_text, "level": 0}]
|
|
25
|
-
|
|
26
|
-
chunks = []
|
|
27
|
-
|
|
28
|
-
# Process each section
|
|
29
|
-
for i, current_header in enumerate(headers):
|
|
30
|
-
header_text = current_header.group(2).strip()
|
|
31
|
-
header_level = len(current_header.group(1))
|
|
32
|
-
|
|
33
|
-
# Determine section content
|
|
34
|
-
if i < len(headers) - 1:
|
|
35
|
-
next_header_start = headers[i + 1].start()
|
|
36
|
-
content = markdown_text[current_header.end() : next_header_start]
|
|
37
|
-
else:
|
|
38
|
-
content = markdown_text[current_header.end() :]
|
|
39
|
-
|
|
40
|
-
chunks.append(
|
|
41
|
-
{
|
|
42
|
-
"title": header_text,
|
|
43
|
-
"content": content.strip(),
|
|
44
|
-
"level": header_level,
|
|
45
|
-
}
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
# Check if there's content before the first header
|
|
49
|
-
if headers[0].start() > 0:
|
|
50
|
-
preamble = markdown_text[: headers[0].start()].strip()
|
|
51
|
-
if preamble:
|
|
52
|
-
chunks.insert(
|
|
53
|
-
0, {"title": "Preamble", "content": preamble, "level": 0}
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
return chunks
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
@traced_and_logged
|
|
60
|
-
def markdown_extract_code_blocks(
|
|
61
|
-
markdown_text: str, language: str = None
|
|
62
|
-
) -> list[dict[str, str]]:
|
|
63
|
-
if not markdown_text:
|
|
64
|
-
return []
|
|
65
|
-
|
|
66
|
-
# Pattern to match markdown code blocks
|
|
67
|
-
if language:
|
|
68
|
-
# Match only code blocks with the specified language
|
|
69
|
-
pattern = rf"```{language}\s*([\s\S]*?)\s*```"
|
|
70
|
-
else:
|
|
71
|
-
# Match all code blocks, capturing the language specifier if present
|
|
72
|
-
pattern = r"```(\w*)\s*([\s\S]*?)\s*```"
|
|
73
|
-
|
|
74
|
-
blocks = []
|
|
75
|
-
|
|
76
|
-
if language:
|
|
77
|
-
# If language is specified, we only capture the code content
|
|
78
|
-
matches = re.finditer(pattern, markdown_text)
|
|
79
|
-
for match in matches:
|
|
80
|
-
blocks.append(
|
|
81
|
-
{"language": language, "code": match.group(1).strip()}
|
|
82
|
-
)
|
|
83
|
-
else:
|
|
84
|
-
# If no language is specified, we capture both language and code content
|
|
85
|
-
matches = re.finditer(pattern, markdown_text)
|
|
86
|
-
for match in matches:
|
|
87
|
-
lang = match.group(1).strip() if match.group(1) else "text"
|
|
88
|
-
blocks.append({"language": lang, "code": match.group(2).strip()})
|
|
89
|
-
|
|
90
|
-
return blocks
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
@traced_and_logged
|
|
94
|
-
def markdown_extract_links(markdown_text: str) -> list[dict[str, str]]:
|
|
95
|
-
if not markdown_text:
|
|
96
|
-
return []
|
|
97
|
-
|
|
98
|
-
# Pattern to match markdown links [text](url)
|
|
99
|
-
link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
|
100
|
-
matches = link_pattern.findall(markdown_text)
|
|
101
|
-
|
|
102
|
-
return [{"text": text, "url": url} for text, url in matches]
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
@traced_and_logged
|
|
106
|
-
def markdown_extract_tables(markdown_text: str) -> list[dict[str, Any]]:
|
|
107
|
-
if not markdown_text:
|
|
108
|
-
return []
|
|
109
|
-
|
|
110
|
-
# Split the text by lines
|
|
111
|
-
lines = markdown_text.split("\n")
|
|
112
|
-
|
|
113
|
-
tables = []
|
|
114
|
-
current_table = None
|
|
115
|
-
header_row = None
|
|
116
|
-
|
|
117
|
-
for line in lines:
|
|
118
|
-
line = line.strip()
|
|
119
|
-
|
|
120
|
-
# Table rows are indicated by starting with |
|
|
121
|
-
if line.startswith("|") and line.endswith("|"):
|
|
122
|
-
if current_table is None:
|
|
123
|
-
current_table = []
|
|
124
|
-
# This is the header row
|
|
125
|
-
header_row = [
|
|
126
|
-
cell.strip() for cell in line.strip("|").split("|")
|
|
127
|
-
]
|
|
128
|
-
elif "|--" in line or "|:-" in line:
|
|
129
|
-
# This is the separator row, ignore it
|
|
130
|
-
pass
|
|
131
|
-
else:
|
|
132
|
-
# This is a data row
|
|
133
|
-
row_data = [cell.strip() for cell in line.strip("|").split("|")]
|
|
134
|
-
|
|
135
|
-
# Create a dictionary mapping headers to values
|
|
136
|
-
row_dict = {}
|
|
137
|
-
for i, header in enumerate(header_row):
|
|
138
|
-
if i < len(row_data):
|
|
139
|
-
row_dict[header] = row_data[i]
|
|
140
|
-
else:
|
|
141
|
-
row_dict[header] = ""
|
|
142
|
-
|
|
143
|
-
current_table.append(row_dict)
|
|
144
|
-
else:
|
|
145
|
-
# End of table
|
|
146
|
-
if current_table is not None:
|
|
147
|
-
tables.append({"headers": header_row, "rows": current_table})
|
|
148
|
-
current_table = None
|
|
149
|
-
header_row = None
|
|
150
|
-
|
|
151
|
-
# Don't forget to add the last table if we're at the end of the document
|
|
152
|
-
if current_table is not None:
|
|
153
|
-
tables.append({"headers": header_row, "rows": current_table})
|
|
154
|
-
|
|
155
|
-
return tables
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
@traced_and_logged
|
|
159
|
-
def markdown_to_plain_text(markdown_text: str) -> str:
|
|
160
|
-
if not markdown_text:
|
|
161
|
-
return ""
|
|
162
|
-
|
|
163
|
-
# Replace headers
|
|
164
|
-
text = re.sub(r"^#{1,6}\s+(.+)$", r"\1", markdown_text, flags=re.MULTILINE)
|
|
165
|
-
|
|
166
|
-
# Replace bold and italic
|
|
167
|
-
text = re.sub(r"\*\*(.*?)\*\*", r"\1", text)
|
|
168
|
-
text = re.sub(r"__(.*?)__", r"\1", text)
|
|
169
|
-
text = re.sub(r"\*(.*?)\*", r"\1", text)
|
|
170
|
-
text = re.sub(r"_(.*?)_", r"\1", text)
|
|
171
|
-
|
|
172
|
-
# Replace links
|
|
173
|
-
text = re.sub(r"\[(.*?)\]\((.*?)\)", r"\1 (\2)", text)
|
|
174
|
-
|
|
175
|
-
# Replace code blocks
|
|
176
|
-
text = re.sub(r"```(?:\w+)?\s*([\s\S]*?)\s*```", r"\1", text)
|
|
177
|
-
text = re.sub(r"`([^`]*?)`", r"\1", text)
|
|
178
|
-
|
|
179
|
-
# Replace bullet points
|
|
180
|
-
text = re.sub(r"^[\*\-\+]\s+(.+)$", r"• \1", text, flags=re.MULTILINE)
|
|
181
|
-
|
|
182
|
-
# Replace numbered lists (keeping the numbers)
|
|
183
|
-
text = re.sub(r"^\d+\.\s+(.+)$", r"\1", text, flags=re.MULTILINE)
|
|
184
|
-
|
|
185
|
-
# Replace blockquotes
|
|
186
|
-
text = re.sub(r"^>\s+(.+)$", r"\1", text, flags=re.MULTILINE)
|
|
187
|
-
|
|
188
|
-
# Remove HTML tags
|
|
189
|
-
text = re.sub(r"<.*?>", "", text)
|
|
190
|
-
|
|
191
|
-
# Normalize whitespace
|
|
192
|
-
text = re.sub(r"\n{3,}", "\n\n", text)
|
|
193
|
-
|
|
194
|
-
return text.strip()
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
@traced_and_logged
|
|
199
|
-
def extract_links_from_markdown(markdown: str, url: str) -> list:
|
|
200
|
-
# Regular expression to find all markdown links
|
|
201
|
-
link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
|
202
|
-
links = link_pattern.findall(markdown)
|
|
203
|
-
return [url + link[1] for link in links]
|
|
204
|
-
|