just-bash 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- just_bash/__init__.py +55 -0
- just_bash/ast/__init__.py +213 -0
- just_bash/ast/factory.py +320 -0
- just_bash/ast/types.py +953 -0
- just_bash/bash.py +220 -0
- just_bash/commands/__init__.py +23 -0
- just_bash/commands/argv/__init__.py +5 -0
- just_bash/commands/argv/argv.py +21 -0
- just_bash/commands/awk/__init__.py +5 -0
- just_bash/commands/awk/awk.py +1168 -0
- just_bash/commands/base64/__init__.py +5 -0
- just_bash/commands/base64/base64.py +138 -0
- just_bash/commands/basename/__init__.py +5 -0
- just_bash/commands/basename/basename.py +72 -0
- just_bash/commands/bash/__init__.py +5 -0
- just_bash/commands/bash/bash.py +188 -0
- just_bash/commands/cat/__init__.py +5 -0
- just_bash/commands/cat/cat.py +173 -0
- just_bash/commands/checksum/__init__.py +5 -0
- just_bash/commands/checksum/checksum.py +179 -0
- just_bash/commands/chmod/__init__.py +5 -0
- just_bash/commands/chmod/chmod.py +216 -0
- just_bash/commands/column/__init__.py +5 -0
- just_bash/commands/column/column.py +180 -0
- just_bash/commands/comm/__init__.py +5 -0
- just_bash/commands/comm/comm.py +150 -0
- just_bash/commands/compression/__init__.py +5 -0
- just_bash/commands/compression/compression.py +298 -0
- just_bash/commands/cp/__init__.py +5 -0
- just_bash/commands/cp/cp.py +149 -0
- just_bash/commands/curl/__init__.py +5 -0
- just_bash/commands/curl/curl.py +801 -0
- just_bash/commands/cut/__init__.py +5 -0
- just_bash/commands/cut/cut.py +327 -0
- just_bash/commands/date/__init__.py +5 -0
- just_bash/commands/date/date.py +258 -0
- just_bash/commands/diff/__init__.py +5 -0
- just_bash/commands/diff/diff.py +118 -0
- just_bash/commands/dirname/__init__.py +5 -0
- just_bash/commands/dirname/dirname.py +56 -0
- just_bash/commands/du/__init__.py +5 -0
- just_bash/commands/du/du.py +150 -0
- just_bash/commands/echo/__init__.py +5 -0
- just_bash/commands/echo/echo.py +125 -0
- just_bash/commands/env/__init__.py +5 -0
- just_bash/commands/env/env.py +163 -0
- just_bash/commands/expand/__init__.py +5 -0
- just_bash/commands/expand/expand.py +299 -0
- just_bash/commands/expr/__init__.py +5 -0
- just_bash/commands/expr/expr.py +273 -0
- just_bash/commands/file/__init__.py +5 -0
- just_bash/commands/file/file.py +274 -0
- just_bash/commands/find/__init__.py +5 -0
- just_bash/commands/find/find.py +623 -0
- just_bash/commands/fold/__init__.py +5 -0
- just_bash/commands/fold/fold.py +160 -0
- just_bash/commands/grep/__init__.py +5 -0
- just_bash/commands/grep/grep.py +418 -0
- just_bash/commands/head/__init__.py +5 -0
- just_bash/commands/head/head.py +167 -0
- just_bash/commands/help/__init__.py +5 -0
- just_bash/commands/help/help.py +67 -0
- just_bash/commands/hostname/__init__.py +5 -0
- just_bash/commands/hostname/hostname.py +21 -0
- just_bash/commands/html_to_markdown/__init__.py +5 -0
- just_bash/commands/html_to_markdown/html_to_markdown.py +191 -0
- just_bash/commands/join/__init__.py +5 -0
- just_bash/commands/join/join.py +252 -0
- just_bash/commands/jq/__init__.py +5 -0
- just_bash/commands/jq/jq.py +280 -0
- just_bash/commands/ln/__init__.py +5 -0
- just_bash/commands/ln/ln.py +127 -0
- just_bash/commands/ls/__init__.py +5 -0
- just_bash/commands/ls/ls.py +280 -0
- just_bash/commands/mkdir/__init__.py +5 -0
- just_bash/commands/mkdir/mkdir.py +92 -0
- just_bash/commands/mv/__init__.py +5 -0
- just_bash/commands/mv/mv.py +142 -0
- just_bash/commands/nl/__init__.py +5 -0
- just_bash/commands/nl/nl.py +180 -0
- just_bash/commands/od/__init__.py +5 -0
- just_bash/commands/od/od.py +157 -0
- just_bash/commands/paste/__init__.py +5 -0
- just_bash/commands/paste/paste.py +100 -0
- just_bash/commands/printf/__init__.py +5 -0
- just_bash/commands/printf/printf.py +157 -0
- just_bash/commands/pwd/__init__.py +5 -0
- just_bash/commands/pwd/pwd.py +23 -0
- just_bash/commands/read/__init__.py +5 -0
- just_bash/commands/read/read.py +185 -0
- just_bash/commands/readlink/__init__.py +5 -0
- just_bash/commands/readlink/readlink.py +86 -0
- just_bash/commands/registry.py +844 -0
- just_bash/commands/rev/__init__.py +5 -0
- just_bash/commands/rev/rev.py +74 -0
- just_bash/commands/rg/__init__.py +5 -0
- just_bash/commands/rg/rg.py +1048 -0
- just_bash/commands/rm/__init__.py +5 -0
- just_bash/commands/rm/rm.py +106 -0
- just_bash/commands/search_engine/__init__.py +13 -0
- just_bash/commands/search_engine/matcher.py +170 -0
- just_bash/commands/search_engine/regex.py +159 -0
- just_bash/commands/sed/__init__.py +5 -0
- just_bash/commands/sed/sed.py +863 -0
- just_bash/commands/seq/__init__.py +5 -0
- just_bash/commands/seq/seq.py +190 -0
- just_bash/commands/shell/__init__.py +5 -0
- just_bash/commands/shell/shell.py +206 -0
- just_bash/commands/sleep/__init__.py +5 -0
- just_bash/commands/sleep/sleep.py +62 -0
- just_bash/commands/sort/__init__.py +5 -0
- just_bash/commands/sort/sort.py +411 -0
- just_bash/commands/split/__init__.py +5 -0
- just_bash/commands/split/split.py +237 -0
- just_bash/commands/sqlite3/__init__.py +5 -0
- just_bash/commands/sqlite3/sqlite3_cmd.py +505 -0
- just_bash/commands/stat/__init__.py +5 -0
- just_bash/commands/stat/stat.py +150 -0
- just_bash/commands/strings/__init__.py +5 -0
- just_bash/commands/strings/strings.py +150 -0
- just_bash/commands/tac/__init__.py +5 -0
- just_bash/commands/tac/tac.py +158 -0
- just_bash/commands/tail/__init__.py +5 -0
- just_bash/commands/tail/tail.py +180 -0
- just_bash/commands/tar/__init__.py +5 -0
- just_bash/commands/tar/tar.py +1067 -0
- just_bash/commands/tee/__init__.py +5 -0
- just_bash/commands/tee/tee.py +63 -0
- just_bash/commands/timeout/__init__.py +5 -0
- just_bash/commands/timeout/timeout.py +188 -0
- just_bash/commands/touch/__init__.py +5 -0
- just_bash/commands/touch/touch.py +91 -0
- just_bash/commands/tr/__init__.py +5 -0
- just_bash/commands/tr/tr.py +297 -0
- just_bash/commands/tree/__init__.py +5 -0
- just_bash/commands/tree/tree.py +139 -0
- just_bash/commands/true/__init__.py +5 -0
- just_bash/commands/true/true.py +32 -0
- just_bash/commands/uniq/__init__.py +5 -0
- just_bash/commands/uniq/uniq.py +323 -0
- just_bash/commands/wc/__init__.py +5 -0
- just_bash/commands/wc/wc.py +169 -0
- just_bash/commands/which/__init__.py +5 -0
- just_bash/commands/which/which.py +52 -0
- just_bash/commands/xan/__init__.py +5 -0
- just_bash/commands/xan/xan.py +1663 -0
- just_bash/commands/xargs/__init__.py +5 -0
- just_bash/commands/xargs/xargs.py +136 -0
- just_bash/commands/yq/__init__.py +5 -0
- just_bash/commands/yq/yq.py +848 -0
- just_bash/fs/__init__.py +29 -0
- just_bash/fs/in_memory_fs.py +621 -0
- just_bash/fs/mountable_fs.py +504 -0
- just_bash/fs/overlay_fs.py +894 -0
- just_bash/fs/read_write_fs.py +455 -0
- just_bash/interpreter/__init__.py +37 -0
- just_bash/interpreter/builtins/__init__.py +92 -0
- just_bash/interpreter/builtins/alias.py +154 -0
- just_bash/interpreter/builtins/cd.py +76 -0
- just_bash/interpreter/builtins/control.py +127 -0
- just_bash/interpreter/builtins/declare.py +336 -0
- just_bash/interpreter/builtins/export.py +56 -0
- just_bash/interpreter/builtins/let.py +44 -0
- just_bash/interpreter/builtins/local.py +57 -0
- just_bash/interpreter/builtins/mapfile.py +152 -0
- just_bash/interpreter/builtins/misc.py +378 -0
- just_bash/interpreter/builtins/readonly.py +80 -0
- just_bash/interpreter/builtins/set.py +234 -0
- just_bash/interpreter/builtins/shopt.py +201 -0
- just_bash/interpreter/builtins/source.py +136 -0
- just_bash/interpreter/builtins/test.py +290 -0
- just_bash/interpreter/builtins/unset.py +53 -0
- just_bash/interpreter/conditionals.py +387 -0
- just_bash/interpreter/control_flow.py +381 -0
- just_bash/interpreter/errors.py +116 -0
- just_bash/interpreter/expansion.py +1156 -0
- just_bash/interpreter/interpreter.py +813 -0
- just_bash/interpreter/types.py +134 -0
- just_bash/network/__init__.py +1 -0
- just_bash/parser/__init__.py +39 -0
- just_bash/parser/lexer.py +948 -0
- just_bash/parser/parser.py +2162 -0
- just_bash/py.typed +0 -0
- just_bash/query_engine/__init__.py +83 -0
- just_bash/query_engine/builtins/__init__.py +1283 -0
- just_bash/query_engine/evaluator.py +578 -0
- just_bash/query_engine/parser.py +525 -0
- just_bash/query_engine/tokenizer.py +329 -0
- just_bash/query_engine/types.py +373 -0
- just_bash/types.py +180 -0
- just_bash-0.1.5.dist-info/METADATA +410 -0
- just_bash-0.1.5.dist-info/RECORD +193 -0
- just_bash-0.1.5.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Head command implementation.
|
|
2
|
+
|
|
3
|
+
Usage: head [OPTION]... [FILE]...
|
|
4
|
+
|
|
5
|
+
Print the first 10 lines of each FILE to standard output.
|
|
6
|
+
With more than one FILE, precede each with a header giving the file name.
|
|
7
|
+
With no FILE, or when FILE is -, read standard input.
|
|
8
|
+
|
|
9
|
+
Options:
|
|
10
|
+
-n, --lines=NUM print the first NUM lines instead of the first 10
|
|
11
|
+
-c, --bytes=NUM print the first NUM bytes
|
|
12
|
+
-q, --quiet never print headers giving file names
|
|
13
|
+
-v, --verbose always print headers giving file names
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from ...types import CommandContext, ExecResult
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class HeadCommand:
|
|
20
|
+
"""The head command."""
|
|
21
|
+
|
|
22
|
+
name = "head"
|
|
23
|
+
|
|
24
|
+
async def execute(self, args: list[str], ctx: CommandContext) -> ExecResult:
|
|
25
|
+
"""Execute the head command."""
|
|
26
|
+
num_lines = 10
|
|
27
|
+
num_bytes = None
|
|
28
|
+
quiet = False
|
|
29
|
+
verbose = False
|
|
30
|
+
files: list[str] = []
|
|
31
|
+
|
|
32
|
+
# Parse arguments
|
|
33
|
+
i = 0
|
|
34
|
+
while i < len(args):
|
|
35
|
+
arg = args[i]
|
|
36
|
+
if arg == "--":
|
|
37
|
+
files.extend(args[i + 1:])
|
|
38
|
+
break
|
|
39
|
+
elif arg.startswith("--lines="):
|
|
40
|
+
try:
|
|
41
|
+
num_lines = int(arg[8:])
|
|
42
|
+
except ValueError:
|
|
43
|
+
return ExecResult(
|
|
44
|
+
stdout="",
|
|
45
|
+
stderr=f"head: invalid number of lines: '{arg[8:]}'\n",
|
|
46
|
+
exit_code=1,
|
|
47
|
+
)
|
|
48
|
+
elif arg.startswith("--bytes="):
|
|
49
|
+
try:
|
|
50
|
+
num_bytes = int(arg[8:])
|
|
51
|
+
except ValueError:
|
|
52
|
+
return ExecResult(
|
|
53
|
+
stdout="",
|
|
54
|
+
stderr=f"head: invalid number of bytes: '{arg[8:]}'\n",
|
|
55
|
+
exit_code=1,
|
|
56
|
+
)
|
|
57
|
+
elif arg == "--quiet" or arg == "-q":
|
|
58
|
+
quiet = True
|
|
59
|
+
elif arg == "--verbose" or arg == "-v":
|
|
60
|
+
verbose = True
|
|
61
|
+
elif arg.startswith("-n"):
|
|
62
|
+
if len(arg) > 2:
|
|
63
|
+
try:
|
|
64
|
+
num_lines = int(arg[2:])
|
|
65
|
+
except ValueError:
|
|
66
|
+
return ExecResult(
|
|
67
|
+
stdout="",
|
|
68
|
+
stderr=f"head: invalid number of lines: '{arg[2:]}'\n",
|
|
69
|
+
exit_code=1,
|
|
70
|
+
)
|
|
71
|
+
else:
|
|
72
|
+
i += 1
|
|
73
|
+
if i >= len(args):
|
|
74
|
+
return ExecResult(
|
|
75
|
+
stdout="",
|
|
76
|
+
stderr="head: option requires an argument -- 'n'\n",
|
|
77
|
+
exit_code=1,
|
|
78
|
+
)
|
|
79
|
+
try:
|
|
80
|
+
num_lines = int(args[i])
|
|
81
|
+
except ValueError:
|
|
82
|
+
return ExecResult(
|
|
83
|
+
stdout="",
|
|
84
|
+
stderr=f"head: invalid number of lines: '{args[i]}'\n",
|
|
85
|
+
exit_code=1,
|
|
86
|
+
)
|
|
87
|
+
elif arg.startswith("-c"):
|
|
88
|
+
if len(arg) > 2:
|
|
89
|
+
try:
|
|
90
|
+
num_bytes = int(arg[2:])
|
|
91
|
+
except ValueError:
|
|
92
|
+
return ExecResult(
|
|
93
|
+
stdout="",
|
|
94
|
+
stderr=f"head: invalid number of bytes: '{arg[2:]}'\n",
|
|
95
|
+
exit_code=1,
|
|
96
|
+
)
|
|
97
|
+
else:
|
|
98
|
+
i += 1
|
|
99
|
+
if i >= len(args):
|
|
100
|
+
return ExecResult(
|
|
101
|
+
stdout="",
|
|
102
|
+
stderr="head: option requires an argument -- 'c'\n",
|
|
103
|
+
exit_code=1,
|
|
104
|
+
)
|
|
105
|
+
try:
|
|
106
|
+
num_bytes = int(args[i])
|
|
107
|
+
except ValueError:
|
|
108
|
+
return ExecResult(
|
|
109
|
+
stdout="",
|
|
110
|
+
stderr=f"head: invalid number of bytes: '{args[i]}'\n",
|
|
111
|
+
exit_code=1,
|
|
112
|
+
)
|
|
113
|
+
elif arg.startswith("-") and len(arg) > 1:
|
|
114
|
+
# Check for -NUM shorthand
|
|
115
|
+
try:
|
|
116
|
+
num_lines = int(arg[1:])
|
|
117
|
+
except ValueError:
|
|
118
|
+
return ExecResult(
|
|
119
|
+
stdout="",
|
|
120
|
+
stderr=f"head: invalid option -- '{arg[1]}'\n",
|
|
121
|
+
exit_code=1,
|
|
122
|
+
)
|
|
123
|
+
else:
|
|
124
|
+
files.append(arg)
|
|
125
|
+
i += 1
|
|
126
|
+
|
|
127
|
+
# Default to stdin
|
|
128
|
+
if not files:
|
|
129
|
+
files = ["-"]
|
|
130
|
+
|
|
131
|
+
stdout = ""
|
|
132
|
+
stderr = ""
|
|
133
|
+
exit_code = 0
|
|
134
|
+
show_headers = (len(files) > 1 and not quiet) or verbose
|
|
135
|
+
|
|
136
|
+
for file_idx, file in enumerate(files):
|
|
137
|
+
try:
|
|
138
|
+
if file == "-":
|
|
139
|
+
content = ctx.stdin
|
|
140
|
+
else:
|
|
141
|
+
path = ctx.fs.resolve_path(ctx.cwd, file)
|
|
142
|
+
content = await ctx.fs.read_file(path)
|
|
143
|
+
|
|
144
|
+
if show_headers:
|
|
145
|
+
if file_idx > 0:
|
|
146
|
+
stdout += "\n"
|
|
147
|
+
stdout += f"==> {file} <==\n"
|
|
148
|
+
|
|
149
|
+
if num_bytes is not None:
|
|
150
|
+
stdout += content[:num_bytes]
|
|
151
|
+
else:
|
|
152
|
+
lines = content.split("\n")
|
|
153
|
+
# Handle the case where content ends with newline
|
|
154
|
+
if lines and lines[-1] == "":
|
|
155
|
+
lines = lines[:-1]
|
|
156
|
+
stdout += "\n".join(lines[:num_lines])
|
|
157
|
+
if lines[:num_lines]:
|
|
158
|
+
stdout += "\n"
|
|
159
|
+
|
|
160
|
+
except FileNotFoundError:
|
|
161
|
+
stderr += f"head: cannot open '{file}' for reading: No such file or directory\n"
|
|
162
|
+
exit_code = 1
|
|
163
|
+
except IsADirectoryError:
|
|
164
|
+
stderr += f"head: error reading '{file}': Is a directory\n"
|
|
165
|
+
exit_code = 1
|
|
166
|
+
|
|
167
|
+
return ExecResult(stdout=stdout, stderr=stderr, exit_code=exit_code)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Help command implementation."""
|
|
2
|
+
|
|
3
|
+
from ..registry import COMMAND_NAMES
|
|
4
|
+
from ...types import CommandContext, ExecResult
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class HelpCommand:
|
|
8
|
+
"""The help command - display help for commands."""
|
|
9
|
+
|
|
10
|
+
name = "help"
|
|
11
|
+
|
|
12
|
+
# Command categories
|
|
13
|
+
CATEGORIES = {
|
|
14
|
+
"File operations": ["ls", "cat", "cp", "mv", "rm", "mkdir", "touch", "chmod", "ln", "readlink", "stat", "file"],
|
|
15
|
+
"Text processing": ["grep", "sed", "awk", "cut", "tr", "sort", "uniq", "head", "tail", "wc", "nl", "rev", "tac", "paste"],
|
|
16
|
+
"Search": ["find", "rg"],
|
|
17
|
+
"Data processing": ["jq", "base64", "diff"],
|
|
18
|
+
"Utilities": ["echo", "printf", "date", "sleep", "timeout", "seq", "expr", "xargs", "tee"],
|
|
19
|
+
"Path utilities": ["basename", "dirname", "pwd"],
|
|
20
|
+
"Directory info": ["tree", "du"],
|
|
21
|
+
"Environment": ["env", "printenv", "hostname"],
|
|
22
|
+
"Shell": ["true", "false", "which", "help"],
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
async def execute(self, args: list[str], ctx: CommandContext) -> ExecResult:
|
|
26
|
+
"""Execute the help command."""
|
|
27
|
+
if "--help" in args or "-h" in args:
|
|
28
|
+
return ExecResult(
|
|
29
|
+
stdout="Usage: help [COMMAND]\n\nShow help for commands.\n",
|
|
30
|
+
stderr="",
|
|
31
|
+
exit_code=0,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Check for specific command
|
|
35
|
+
for arg in args:
|
|
36
|
+
if not arg.startswith("-"):
|
|
37
|
+
return self._help_for_command(arg)
|
|
38
|
+
|
|
39
|
+
# Show general help
|
|
40
|
+
lines = ["Available commands:", ""]
|
|
41
|
+
|
|
42
|
+
for category, cmds in self.CATEGORIES.items():
|
|
43
|
+
available = [c for c in cmds if c in COMMAND_NAMES]
|
|
44
|
+
if available:
|
|
45
|
+
lines.append(f"{category}:")
|
|
46
|
+
lines.append(f" {', '.join(available)}")
|
|
47
|
+
lines.append("")
|
|
48
|
+
|
|
49
|
+
lines.append("Use 'COMMAND --help' for more information about a command.")
|
|
50
|
+
|
|
51
|
+
return ExecResult(stdout="\n".join(lines) + "\n", stderr="", exit_code=0)
|
|
52
|
+
|
|
53
|
+
def _help_for_command(self, cmd: str) -> ExecResult:
|
|
54
|
+
"""Show help for a specific command."""
|
|
55
|
+
if cmd not in COMMAND_NAMES:
|
|
56
|
+
return ExecResult(
|
|
57
|
+
stdout="",
|
|
58
|
+
stderr=f"help: no help topics match '{cmd}'.\n",
|
|
59
|
+
exit_code=127,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Basic help - suggest using --help
|
|
63
|
+
return ExecResult(
|
|
64
|
+
stdout=f"{cmd}: Use '{cmd} --help' for detailed usage information.\n",
|
|
65
|
+
stderr="",
|
|
66
|
+
exit_code=0,
|
|
67
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Hostname command implementation."""
|
|
2
|
+
|
|
3
|
+
from ...types import CommandContext, ExecResult
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class HostnameCommand:
|
|
7
|
+
"""The hostname command."""
|
|
8
|
+
|
|
9
|
+
name = "hostname"
|
|
10
|
+
|
|
11
|
+
async def execute(self, args: list[str], ctx: CommandContext) -> ExecResult:
|
|
12
|
+
"""Execute the hostname command."""
|
|
13
|
+
if "--help" in args:
|
|
14
|
+
return ExecResult(
|
|
15
|
+
stdout="Usage: hostname [OPTION]...\n",
|
|
16
|
+
stderr="",
|
|
17
|
+
exit_code=0,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Return localhost in sandboxed environment
|
|
21
|
+
return ExecResult(stdout="localhost\n", stderr="", exit_code=0)
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""HTML to Markdown conversion command implementation."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from ...types import CommandContext, ExecResult
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class HtmlToMarkdownCommand:
|
|
9
|
+
"""The html-to-markdown command - convert HTML to Markdown."""
|
|
10
|
+
|
|
11
|
+
name = "html-to-markdown"
|
|
12
|
+
|
|
13
|
+
async def execute(self, args: list[str], ctx: CommandContext) -> ExecResult:
|
|
14
|
+
"""Execute the html-to-markdown command."""
|
|
15
|
+
try:
|
|
16
|
+
from markdownify import markdownify
|
|
17
|
+
except ImportError:
|
|
18
|
+
return ExecResult(
|
|
19
|
+
stdout="",
|
|
20
|
+
stderr="html-to-markdown: markdownify package not installed\n",
|
|
21
|
+
exit_code=1,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Parse options
|
|
25
|
+
bullet_char = "-"
|
|
26
|
+
code_fence = "```"
|
|
27
|
+
hr_style = "---"
|
|
28
|
+
heading_style = "atx"
|
|
29
|
+
file_path = None
|
|
30
|
+
|
|
31
|
+
i = 0
|
|
32
|
+
while i < len(args):
|
|
33
|
+
arg = args[i]
|
|
34
|
+
|
|
35
|
+
if arg == "--help" or arg == "-h":
|
|
36
|
+
return ExecResult(
|
|
37
|
+
stdout=self._get_help(),
|
|
38
|
+
stderr="",
|
|
39
|
+
exit_code=0,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Handle -b/--bullet
|
|
43
|
+
if arg == "-b" and i + 1 < len(args):
|
|
44
|
+
bullet_char = args[i + 1]
|
|
45
|
+
i += 2
|
|
46
|
+
continue
|
|
47
|
+
if arg.startswith("--bullet="):
|
|
48
|
+
bullet_char = arg[9:]
|
|
49
|
+
i += 1
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
# Handle -c/--code
|
|
53
|
+
if arg == "-c" and i + 1 < len(args):
|
|
54
|
+
code_fence = args[i + 1]
|
|
55
|
+
i += 2
|
|
56
|
+
continue
|
|
57
|
+
if arg.startswith("--code="):
|
|
58
|
+
code_fence = arg[7:]
|
|
59
|
+
i += 1
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
# Handle -r/--hr
|
|
63
|
+
if arg == "-r" and i + 1 < len(args):
|
|
64
|
+
hr_style = args[i + 1]
|
|
65
|
+
i += 2
|
|
66
|
+
continue
|
|
67
|
+
if arg.startswith("--hr="):
|
|
68
|
+
hr_style = arg[5:]
|
|
69
|
+
i += 1
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
# Handle --heading-style
|
|
73
|
+
if arg.startswith("--heading-style="):
|
|
74
|
+
heading_style = arg[16:]
|
|
75
|
+
i += 1
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
# Non-option argument is the file path
|
|
79
|
+
if not arg.startswith("-") or arg == "-":
|
|
80
|
+
file_path = arg
|
|
81
|
+
i += 1
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
i += 1
|
|
85
|
+
|
|
86
|
+
# Get input content
|
|
87
|
+
if file_path is None or file_path == "-":
|
|
88
|
+
# Read from stdin
|
|
89
|
+
html_content = ctx.stdin or ""
|
|
90
|
+
else:
|
|
91
|
+
# Read from file
|
|
92
|
+
abs_path = self._resolve_path(file_path, ctx.cwd)
|
|
93
|
+
if not await ctx.fs.exists(abs_path):
|
|
94
|
+
return ExecResult(
|
|
95
|
+
stdout="",
|
|
96
|
+
stderr=f"html-to-markdown: {file_path}: No such file or directory\n",
|
|
97
|
+
exit_code=1,
|
|
98
|
+
)
|
|
99
|
+
stat = await ctx.fs.stat(abs_path)
|
|
100
|
+
if stat and stat.is_directory:
|
|
101
|
+
return ExecResult(
|
|
102
|
+
stdout="",
|
|
103
|
+
stderr=f"html-to-markdown: {file_path}: Is a directory\n",
|
|
104
|
+
exit_code=1,
|
|
105
|
+
)
|
|
106
|
+
html_content = await ctx.fs.read_file(abs_path)
|
|
107
|
+
|
|
108
|
+
# Strip script, style, and footer elements before conversion
|
|
109
|
+
html_content = self._strip_elements(html_content)
|
|
110
|
+
|
|
111
|
+
# Convert HTML to Markdown
|
|
112
|
+
try:
|
|
113
|
+
# Configure markdownify options
|
|
114
|
+
markdown = markdownify(
|
|
115
|
+
html_content,
|
|
116
|
+
bullets=bullet_char,
|
|
117
|
+
code_language_callback=None,
|
|
118
|
+
heading_style=heading_style.upper() if heading_style == "atx" else heading_style,
|
|
119
|
+
strip=["script", "style", "footer"],
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Post-process: apply code fence style and hr style
|
|
123
|
+
markdown = self._apply_code_fence(markdown, code_fence)
|
|
124
|
+
markdown = self._apply_hr_style(markdown, hr_style)
|
|
125
|
+
|
|
126
|
+
# Trim and ensure trailing newline
|
|
127
|
+
result = markdown.strip()
|
|
128
|
+
if result:
|
|
129
|
+
result += "\n"
|
|
130
|
+
|
|
131
|
+
return ExecResult(stdout=result, stderr="", exit_code=0)
|
|
132
|
+
|
|
133
|
+
except Exception as e:
|
|
134
|
+
return ExecResult(
|
|
135
|
+
stdout="",
|
|
136
|
+
stderr=f"html-to-markdown: conversion error: {e}\n",
|
|
137
|
+
exit_code=1,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def _resolve_path(self, path: str, cwd: str) -> str:
|
|
141
|
+
"""Resolve a path relative to cwd."""
|
|
142
|
+
if path.startswith("/"):
|
|
143
|
+
return path
|
|
144
|
+
if cwd.endswith("/"):
|
|
145
|
+
return cwd + path
|
|
146
|
+
return cwd + "/" + path
|
|
147
|
+
|
|
148
|
+
def _strip_elements(self, html: str) -> str:
|
|
149
|
+
"""Strip script, style, and footer elements from HTML."""
|
|
150
|
+
# Remove script tags and their content
|
|
151
|
+
html = re.sub(r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL | re.IGNORECASE)
|
|
152
|
+
# Remove style tags and their content
|
|
153
|
+
html = re.sub(r"<style[^>]*>.*?</style>", "", html, flags=re.DOTALL | re.IGNORECASE)
|
|
154
|
+
# Remove footer tags and their content
|
|
155
|
+
html = re.sub(r"<footer[^>]*>.*?</footer>", "", html, flags=re.DOTALL | re.IGNORECASE)
|
|
156
|
+
return html
|
|
157
|
+
|
|
158
|
+
def _apply_code_fence(self, markdown: str, fence: str) -> str:
|
|
159
|
+
"""Replace default code fences with the specified style."""
|
|
160
|
+
# Replace ``` with the specified fence
|
|
161
|
+
if fence != "```":
|
|
162
|
+
# Only replace fences that are on their own line
|
|
163
|
+
markdown = re.sub(r"^```", fence, markdown, flags=re.MULTILINE)
|
|
164
|
+
return markdown
|
|
165
|
+
|
|
166
|
+
def _apply_hr_style(self, markdown: str, hr_style: str) -> str:
|
|
167
|
+
"""Replace default horizontal rules with the specified style."""
|
|
168
|
+
# Markdownify uses ___ or --- or *** for hr
|
|
169
|
+
# Replace common patterns
|
|
170
|
+
markdown = re.sub(r"^___$", hr_style, markdown, flags=re.MULTILINE)
|
|
171
|
+
markdown = re.sub(r"^\*\*\*$", hr_style, markdown, flags=re.MULTILINE)
|
|
172
|
+
# Don't replace --- if it's already the style
|
|
173
|
+
if hr_style != "---":
|
|
174
|
+
markdown = re.sub(r"^---$", hr_style, markdown, flags=re.MULTILINE)
|
|
175
|
+
return markdown
|
|
176
|
+
|
|
177
|
+
def _get_help(self) -> str:
|
|
178
|
+
"""Return help text."""
|
|
179
|
+
return """Usage: html-to-markdown [OPTIONS] [FILE]
|
|
180
|
+
|
|
181
|
+
Convert HTML to Markdown.
|
|
182
|
+
|
|
183
|
+
Options:
|
|
184
|
+
-b, --bullet=CHAR Bullet character for unordered lists (default: -)
|
|
185
|
+
-c, --code=FENCE Code fence style (default: ```)
|
|
186
|
+
-r, --hr=STRING Horizontal rule string (default: ---)
|
|
187
|
+
--heading-style=STYLE Heading style: atx or setext (default: atx)
|
|
188
|
+
-h, --help Show this help message
|
|
189
|
+
|
|
190
|
+
If FILE is omitted or is -, read from standard input.
|
|
191
|
+
"""
|