@nataliapc/mcp-openmsx 1.2.10 → 1.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -2
- package/dist/chunker.js +187 -0
- package/dist/embedder.js +250 -0
- package/dist/server.js +6 -1
- package/dist/server_tools.js +6 -5
- package/dist/vectordb.js +94 -35
- package/package.json +4 -8
- package/resources/audio/chipsfmpacpr1_en.md +209 -0
- package/resources/audio/chipsfmpacpr2_en.md +170 -0
- package/resources/audio/toc.json +12 -0
- package/resources/book--msx-top-secret-3/MTS3-Appendix-English-Upd2.pdf +0 -0
- package/resources/book--msx-top-secret-3/MTS3-Complete-English.pdf +0 -0
- package/resources/book--msx-top-secret-3/mts3-appendix-english-upd2.md +25863 -0
- package/resources/book--msx-top-secret-3/mts3-complete-english.md +44895 -0
- package/resources/book--msx2-technical-handbook/toc.json +1 -1
- package/resources/book--the-msx-red-book/Chapter1_Programmable_Peripheral_Interface.md +112 -0
- package/resources/book--the-msx-red-book/Chapter2_Video_Display_Processor.md +308 -0
- package/resources/book--the-msx-red-book/Chapter3_Programmable_Sound_Generator.md +168 -0
- package/resources/book--the-msx-red-book/Chapter4_ROM_BIOS.md +2528 -0
- package/resources/book--the-msx-red-book/Chapter5_ROM_BASIC_Interpreter.md +3975 -0
- package/resources/book--the-msx-red-book/Chapter6_Memory_Map.md +1963 -0
- package/resources/book--the-msx-red-book/Chapter7_Machine_Code_Programs.md +1238 -0
- package/resources/book--the-msx-red-book/Introduction.md +104 -0
- package/resources/book--the-msx-red-book/toc.json +38 -3
- package/resources/processors/toc.json +3 -3
- package/resources/processors/z80-undocumented.md +141 -0
- package/resources/programming/asm_develop_a_program_in_cartridge_rom.md +1881 -0
- package/resources/programming/toc.json +6 -0
- package/resources/sdcc/1_Introduction.md +199 -0
- package/resources/sdcc/2_Installing_SDCC.md +533 -0
- package/resources/sdcc/3_Using_SDCC.md +1758 -0
- package/resources/sdcc/4_Notes_on_supported_Processors.md +1638 -0
- package/resources/sdcc/5_Debugging.md +210 -0
- package/resources/sdcc/6_Tips_and_Support.md +258 -0
- package/resources/sdcc/7_SDCC_Technical_Data.md +489 -0
- package/resources/sdcc/8_Compiler_internals.md +477 -0
- package/resources/sdcc/toc.json +44 -2
- package/resources/system/how_to_detect_ram.md +14 -0
- package/resources/system/mrc_wiki_megarom_mappers.md +533 -0
- package/resources/system/the_memory.md +118 -0
- package/resources/system/toc.json +18 -0
- package/vector-db/__manifest/_transactions/0-675ee228-bffb-4636-80e5-cdfde25cc4fe.txn +2 -0
- package/vector-db/__manifest/_versions/18446744073709551614.manifest +0 -0
- package/vector-db/__manifest/_versions/latest_version_hint.json +1 -0
- package/vector-db/msxdocs.lance/_indices/37194b01-2a25-40d1-ac38-7fbe254df5ea/metadata.lance +0 -0
- package/vector-db/msxdocs.lance/_indices/37194b01-2a25-40d1-ac38-7fbe254df5ea/part_2_docs.lance +0 -0
- package/vector-db/msxdocs.lance/_indices/37194b01-2a25-40d1-ac38-7fbe254df5ea/part_2_invert.lance +0 -0
- package/vector-db/msxdocs.lance/_indices/37194b01-2a25-40d1-ac38-7fbe254df5ea/part_2_tokens.lance +0 -0
- package/vector-db/msxdocs.lance/_transactions/0-dd155672-40e6-4c6a-942f-7fcbe8c3dbd0.txn +0 -0
- package/vector-db/msxdocs.lance/_transactions/1-e7230cbd-ce8e-465c-9b85-b91443862427.txn +0 -0
- package/vector-db/msxdocs.lance/_versions/18446744073709551613.manifest +0 -0
- package/vector-db/msxdocs.lance/_versions/18446744073709551614.manifest +0 -0
- package/vector-db/msxdocs.lance/_versions/latest_version_hint.json +1 -0
- package/vector-db/msxdocs.lance/data/000100110110001011110001fc578141d296825d0bea11c95d.lance +0 -0
- package/resources/book--the-msx-red-book/the_msx_red_book.md +0 -10349
- package/resources/processors/z80-undocumented.tex +0 -5617
- package/resources/sdcc/lyx2md.py +0 -745
- package/resources/sdcc/sdccman.lyx +0 -81574
- package/resources/sdcc/sdccman.md +0 -5557
- package/vector-db/index.json +0 -1
package/resources/sdcc/lyx2md.py
DELETED
|
@@ -1,745 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Convert a LyX file to Markdown, preserving math and formatting."""
|
|
3
|
-
|
|
4
|
-
import re
|
|
5
|
-
import sys
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def parse_lyx(filepath):
|
|
9
|
-
"""Read a LyX file and return list of lines."""
|
|
10
|
-
with open(filepath, "r", encoding="utf-8", errors="replace") as f:
|
|
11
|
-
return f.readlines()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class LyxToMarkdown:
|
|
15
|
-
def __init__(self):
|
|
16
|
-
self.output = []
|
|
17
|
-
self.lines = []
|
|
18
|
-
self.pos = 0
|
|
19
|
-
self.in_body = False
|
|
20
|
-
self.enumerate_counter = 0
|
|
21
|
-
self.label_map = {} # label name -> section title
|
|
22
|
-
self.current_section = ""
|
|
23
|
-
# Stack of (layout_context, accumulated_text) for nested layouts
|
|
24
|
-
self.layout_stack = []
|
|
25
|
-
self.current_text = ""
|
|
26
|
-
self._layout_context = "unknown"
|
|
27
|
-
|
|
28
|
-
def convert(self, filepath):
|
|
29
|
-
self.lines = parse_lyx(filepath)
|
|
30
|
-
self.pos = 0
|
|
31
|
-
|
|
32
|
-
# First pass: collect labels and map them to section titles
|
|
33
|
-
self._collect_labels()
|
|
34
|
-
|
|
35
|
-
# Second pass: convert
|
|
36
|
-
self.pos = 0
|
|
37
|
-
self.in_body = False
|
|
38
|
-
while self.pos < len(self.lines):
|
|
39
|
-
line = self.lines[self.pos].rstrip("\n")
|
|
40
|
-
self._process_line(line)
|
|
41
|
-
self.pos += 1
|
|
42
|
-
|
|
43
|
-
return "\n".join(self.output)
|
|
44
|
-
|
|
45
|
-
def _collect_labels(self):
|
|
46
|
-
"""First pass to map label names to nearby section titles."""
|
|
47
|
-
# We'll look for label names and associate them with the most recent
|
|
48
|
-
# section/chapter heading text
|
|
49
|
-
current_heading = ""
|
|
50
|
-
for i, line in enumerate(self.lines):
|
|
51
|
-
line = line.strip()
|
|
52
|
-
# Detect heading layouts
|
|
53
|
-
m = re.match(
|
|
54
|
-
r"\\begin_layout (Chapter\*?|Section|Subsection|Subsubsection)",
|
|
55
|
-
line,
|
|
56
|
-
)
|
|
57
|
-
if m:
|
|
58
|
-
# Next non-blank, non-directive line is the title
|
|
59
|
-
for j in range(i + 1, min(i + 5, len(self.lines))):
|
|
60
|
-
candidate = self.lines[j].strip()
|
|
61
|
-
if (
|
|
62
|
-
candidate
|
|
63
|
-
and not candidate.startswith("\\")
|
|
64
|
-
and not candidate.startswith("<")
|
|
65
|
-
):
|
|
66
|
-
current_heading = candidate
|
|
67
|
-
break
|
|
68
|
-
# Detect label names
|
|
69
|
-
if line.startswith('name "') and line.endswith('"'):
|
|
70
|
-
label_name = line[6:-1]
|
|
71
|
-
if label_name and current_heading:
|
|
72
|
-
self.label_map[label_name] = current_heading
|
|
73
|
-
# Also handle Float/Table captions for label association
|
|
74
|
-
m2 = re.match(r"\\begin_inset Float", line)
|
|
75
|
-
if m2:
|
|
76
|
-
# Look ahead for caption text
|
|
77
|
-
for j in range(i + 1, min(i + 50, len(self.lines))):
|
|
78
|
-
cline = self.lines[j].strip()
|
|
79
|
-
if cline == "\\begin_inset Caption":
|
|
80
|
-
for k in range(j + 1, min(j + 10, len(self.lines))):
|
|
81
|
-
capline = self.lines[k].strip()
|
|
82
|
-
if (
|
|
83
|
-
capline
|
|
84
|
-
and not capline.startswith("\\")
|
|
85
|
-
and not capline.startswith("<")
|
|
86
|
-
):
|
|
87
|
-
current_heading = capline
|
|
88
|
-
break
|
|
89
|
-
break
|
|
90
|
-
|
|
91
|
-
def _process_line(self, line):
|
|
92
|
-
stripped = line.strip()
|
|
93
|
-
|
|
94
|
-
if stripped == "\\begin_body":
|
|
95
|
-
self.in_body = True
|
|
96
|
-
return
|
|
97
|
-
if not self.in_body:
|
|
98
|
-
return
|
|
99
|
-
if stripped == "\\end_body":
|
|
100
|
-
self.in_body = False
|
|
101
|
-
return
|
|
102
|
-
|
|
103
|
-
# Layout begin
|
|
104
|
-
if stripped.startswith("\\begin_layout "):
|
|
105
|
-
layout_type = stripped[len("\\begin_layout ") :]
|
|
106
|
-
self._handle_layout_begin(layout_type)
|
|
107
|
-
return
|
|
108
|
-
|
|
109
|
-
if stripped == "\\end_layout":
|
|
110
|
-
self._handle_layout_end()
|
|
111
|
-
return
|
|
112
|
-
|
|
113
|
-
# Inset begin
|
|
114
|
-
if stripped.startswith("\\begin_inset "):
|
|
115
|
-
inset_spec = stripped[len("\\begin_inset ") :]
|
|
116
|
-
self._handle_inset_begin(inset_spec)
|
|
117
|
-
return
|
|
118
|
-
|
|
119
|
-
if stripped == "\\end_inset":
|
|
120
|
-
# Most end_insets are no-ops for content accumulation
|
|
121
|
-
return
|
|
122
|
-
|
|
123
|
-
# Skip status/alignment/formatting directives
|
|
124
|
-
if stripped in ("status open", "status collapsed", "status inlined"):
|
|
125
|
-
return
|
|
126
|
-
if stripped.startswith("\\align "):
|
|
127
|
-
return
|
|
128
|
-
if stripped.startswith("\\noindent"):
|
|
129
|
-
return
|
|
130
|
-
if stripped.startswith("\\paragraph_spacing"):
|
|
131
|
-
return
|
|
132
|
-
if stripped.startswith("wide ") or stripped.startswith("sideways "):
|
|
133
|
-
return
|
|
134
|
-
|
|
135
|
-
# Formatting commands - inline markers
|
|
136
|
-
# We use \x01 as a "soft space" after closing markers - it becomes a space
|
|
137
|
-
# if the next character is a word char, or is removed before punctuation.
|
|
138
|
-
if stripped.startswith("\\emph "):
|
|
139
|
-
val = stripped.split()[1]
|
|
140
|
-
if val == "on":
|
|
141
|
-
if self.current_text and not self.current_text.endswith(
|
|
142
|
-
(" ", "\n", "(", "[", "\u201c", "\x01")
|
|
143
|
-
):
|
|
144
|
-
self.current_text += " "
|
|
145
|
-
self.current_text += "*"
|
|
146
|
-
elif val in ("default", "off"):
|
|
147
|
-
if self.current_text.endswith(" "):
|
|
148
|
-
self.current_text = self.current_text[:-1]
|
|
149
|
-
self.current_text += "*\x01"
|
|
150
|
-
return
|
|
151
|
-
|
|
152
|
-
if stripped.startswith("\\series "):
|
|
153
|
-
val = stripped.split()[1]
|
|
154
|
-
if val == "bold":
|
|
155
|
-
if self.current_text and not self.current_text.endswith(
|
|
156
|
-
(" ", "\n", "(", "[", "\u201c", "\x01")
|
|
157
|
-
):
|
|
158
|
-
self.current_text += " "
|
|
159
|
-
self.current_text += "**"
|
|
160
|
-
elif val in ("default", "medium"):
|
|
161
|
-
if self.current_text.endswith(" "):
|
|
162
|
-
self.current_text = self.current_text[:-1]
|
|
163
|
-
self.current_text += "**\x01"
|
|
164
|
-
return
|
|
165
|
-
|
|
166
|
-
if stripped.startswith("\\shape "):
|
|
167
|
-
val = stripped.split()[1]
|
|
168
|
-
if val == "italic":
|
|
169
|
-
if self.current_text and not self.current_text.endswith(
|
|
170
|
-
(" ", "\n", "(", "[", "\u201c", "\x01")
|
|
171
|
-
):
|
|
172
|
-
self.current_text += " "
|
|
173
|
-
self.current_text += "*"
|
|
174
|
-
elif val in ("default", "up"):
|
|
175
|
-
if self.current_text.endswith(" "):
|
|
176
|
-
self.current_text = self.current_text[:-1]
|
|
177
|
-
self.current_text += "*\x01"
|
|
178
|
-
return
|
|
179
|
-
|
|
180
|
-
# Skip other formatting directives
|
|
181
|
-
for prefix in (
|
|
182
|
-
"\\noun ",
|
|
183
|
-
"\\bar ",
|
|
184
|
-
"\\color ",
|
|
185
|
-
"\\size ",
|
|
186
|
-
"\\family ",
|
|
187
|
-
"\\strikeout",
|
|
188
|
-
):
|
|
189
|
-
if stripped.startswith(prefix):
|
|
190
|
-
return
|
|
191
|
-
|
|
192
|
-
# Backslash escape
|
|
193
|
-
if stripped == "\\backslash":
|
|
194
|
-
self.current_text += "\\"
|
|
195
|
-
return
|
|
196
|
-
|
|
197
|
-
# Skip deeper/end_deeper
|
|
198
|
-
if stripped in ("\\begin_deeper", "\\end_deeper"):
|
|
199
|
-
return
|
|
200
|
-
|
|
201
|
-
# Skip various LyX preamble/header directives that might leak
|
|
202
|
-
skip_prefixes = (
|
|
203
|
-
"\\lyxformat",
|
|
204
|
-
"\\papersize",
|
|
205
|
-
"\\use_",
|
|
206
|
-
"\\font_",
|
|
207
|
-
"\\graphics",
|
|
208
|
-
"\\paper",
|
|
209
|
-
"\\spacing",
|
|
210
|
-
"\\cite_",
|
|
211
|
-
"\\secnumdepth",
|
|
212
|
-
"\\tocdepth",
|
|
213
|
-
"\\paragraph_",
|
|
214
|
-
"\\defskip",
|
|
215
|
-
"\\quotes_",
|
|
216
|
-
"\\tracking_",
|
|
217
|
-
"\\output_",
|
|
218
|
-
"\\author",
|
|
219
|
-
"\\textclass",
|
|
220
|
-
"\\language",
|
|
221
|
-
"\\inputencoding",
|
|
222
|
-
"\\default_",
|
|
223
|
-
"\\maintain_",
|
|
224
|
-
)
|
|
225
|
-
if stripped.startswith("\\"):
|
|
226
|
-
for prefix in skip_prefixes:
|
|
227
|
-
if stripped.startswith(prefix):
|
|
228
|
-
return
|
|
229
|
-
# Other backslash commands we don't recognize - skip silently
|
|
230
|
-
if not stripped.startswith("\\begin") and not stripped.startswith("\\end"):
|
|
231
|
-
return
|
|
232
|
-
|
|
233
|
-
# Regular text content
|
|
234
|
-
if stripped and not stripped.startswith("<"):
|
|
235
|
-
# Handle soft space marker: convert to real space before words,
|
|
236
|
-
# remove before punctuation
|
|
237
|
-
if self.current_text.endswith("\x01"):
|
|
238
|
-
self.current_text = self.current_text[:-1] # remove marker
|
|
239
|
-
if stripped[0] not in ".,;:!?)-]":
|
|
240
|
-
self.current_text += " "
|
|
241
|
-
# Add space before text if current_text doesn't end with whitespace/markers
|
|
242
|
-
# and the text doesn't start with punctuation
|
|
243
|
-
elif (
|
|
244
|
-
self.current_text
|
|
245
|
-
and not self.current_text.endswith((" ", "\n", "(", "[", "*"))
|
|
246
|
-
and stripped[0] not in ".,;:!?)-]"
|
|
247
|
-
):
|
|
248
|
-
self.current_text += " "
|
|
249
|
-
self.current_text += stripped
|
|
250
|
-
# Skip XML-like tags
|
|
251
|
-
|
|
252
|
-
def _handle_layout_begin(self, layout_type):
|
|
253
|
-
"""Handle the beginning of a layout, pushing state onto the stack."""
|
|
254
|
-
# Push current state
|
|
255
|
-
self.layout_stack.append((self._layout_context, self.current_text))
|
|
256
|
-
|
|
257
|
-
# Determine new context
|
|
258
|
-
if layout_type == "Title":
|
|
259
|
-
ctx = "title"
|
|
260
|
-
elif layout_type == "Author":
|
|
261
|
-
ctx = "author"
|
|
262
|
-
elif layout_type.startswith("Chapter"):
|
|
263
|
-
ctx = "chapter"
|
|
264
|
-
elif layout_type == "Section":
|
|
265
|
-
ctx = "section"
|
|
266
|
-
elif layout_type == "Subsection":
|
|
267
|
-
ctx = "subsection"
|
|
268
|
-
elif layout_type == "Subsubsection":
|
|
269
|
-
ctx = "subsubsection"
|
|
270
|
-
elif layout_type == "Enumerate":
|
|
271
|
-
ctx = "enumerate"
|
|
272
|
-
self.enumerate_counter += 1
|
|
273
|
-
elif layout_type == "Itemize":
|
|
274
|
-
ctx = "itemize"
|
|
275
|
-
elif layout_type == "Standard":
|
|
276
|
-
ctx = "standard"
|
|
277
|
-
elif layout_type == "Plain" or layout_type.startswith("Plain "):
|
|
278
|
-
ctx = "plain"
|
|
279
|
-
else:
|
|
280
|
-
ctx = "unknown"
|
|
281
|
-
|
|
282
|
-
self._layout_context = ctx
|
|
283
|
-
self.current_text = ""
|
|
284
|
-
|
|
285
|
-
def _handle_layout_end(self):
|
|
286
|
-
"""Handle end of layout, popping state from the stack."""
|
|
287
|
-
text = self._clean_text(self.current_text)
|
|
288
|
-
ctx = self._layout_context
|
|
289
|
-
|
|
290
|
-
# Pop parent state
|
|
291
|
-
if self.layout_stack:
|
|
292
|
-
parent_ctx, parent_text = self.layout_stack.pop()
|
|
293
|
-
else:
|
|
294
|
-
parent_ctx, parent_text = "unknown", ""
|
|
295
|
-
|
|
296
|
-
# Emit content based on context
|
|
297
|
-
if ctx == "title":
|
|
298
|
-
self.output.append(f"# {text}")
|
|
299
|
-
self.output.append("")
|
|
300
|
-
elif ctx == "author":
|
|
301
|
-
self.output.append(f"*{text}*")
|
|
302
|
-
self.output.append("")
|
|
303
|
-
elif ctx == "chapter":
|
|
304
|
-
self.output.append(f"## {text}")
|
|
305
|
-
self.output.append("")
|
|
306
|
-
self.current_section = text
|
|
307
|
-
self.enumerate_counter = 0
|
|
308
|
-
elif ctx == "section":
|
|
309
|
-
self.output.append(f"### {text}")
|
|
310
|
-
self.output.append("")
|
|
311
|
-
self.current_section = text
|
|
312
|
-
self.enumerate_counter = 0
|
|
313
|
-
elif ctx == "subsection":
|
|
314
|
-
self.output.append(f"#### {text}")
|
|
315
|
-
self.output.append("")
|
|
316
|
-
self.current_section = text
|
|
317
|
-
self.enumerate_counter = 0
|
|
318
|
-
elif ctx == "subsubsection":
|
|
319
|
-
self.output.append(f"##### {text}")
|
|
320
|
-
self.output.append("")
|
|
321
|
-
self.current_section = text
|
|
322
|
-
self.enumerate_counter = 0
|
|
323
|
-
elif ctx == "enumerate":
|
|
324
|
-
self.output.append(f"{self.enumerate_counter}. {text}")
|
|
325
|
-
elif ctx == "itemize":
|
|
326
|
-
self.output.append(f"- {text}")
|
|
327
|
-
elif ctx == "standard":
|
|
328
|
-
if text.strip():
|
|
329
|
-
self.output.append(text)
|
|
330
|
-
self.output.append("")
|
|
331
|
-
elif ctx == "plain":
|
|
332
|
-
# Plain layout inside insets - push text back to parent
|
|
333
|
-
if text.strip():
|
|
334
|
-
parent_text += " " + text
|
|
335
|
-
elif ctx == "unknown":
|
|
336
|
-
if text.strip():
|
|
337
|
-
self.output.append(text)
|
|
338
|
-
self.output.append("")
|
|
339
|
-
|
|
340
|
-
# Restore parent state
|
|
341
|
-
self._layout_context = parent_ctx
|
|
342
|
-
self.current_text = parent_text
|
|
343
|
-
|
|
344
|
-
def _handle_inset_begin(self, inset_spec):
|
|
345
|
-
if inset_spec.startswith("Formula"):
|
|
346
|
-
self._handle_formula(inset_spec)
|
|
347
|
-
elif inset_spec.startswith("Quotes"):
|
|
348
|
-
self._handle_quotes(inset_spec)
|
|
349
|
-
elif inset_spec.startswith("Newline"):
|
|
350
|
-
self.current_text += " \n"
|
|
351
|
-
elif inset_spec.startswith("Newpage"):
|
|
352
|
-
pass
|
|
353
|
-
elif inset_spec.startswith("CommandInset label"):
|
|
354
|
-
self._handle_label()
|
|
355
|
-
elif inset_spec.startswith("CommandInset ref"):
|
|
356
|
-
self._handle_ref()
|
|
357
|
-
elif inset_spec.startswith("Graphics"):
|
|
358
|
-
self._handle_graphics()
|
|
359
|
-
elif inset_spec.startswith("Float"):
|
|
360
|
-
pass # Float is just a container; nested layouts handle content
|
|
361
|
-
elif inset_spec.startswith("Caption"):
|
|
362
|
-
# Mark that we're in a caption so the plain layout can format it
|
|
363
|
-
self.current_text += "\n\n*Caption:* "
|
|
364
|
-
elif inset_spec.startswith("Tabular"):
|
|
365
|
-
self._handle_tabular()
|
|
366
|
-
elif inset_spec.startswith("Box"):
|
|
367
|
-
self._handle_box()
|
|
368
|
-
elif inset_spec.startswith("Text"):
|
|
369
|
-
pass # Text inset is a container
|
|
370
|
-
elif inset_spec.startswith("ERT"):
|
|
371
|
-
self._handle_ert()
|
|
372
|
-
|
|
373
|
-
def _handle_box(self):
|
|
374
|
-
"""Consume Box inset parameters (position, hor_pos, etc.)."""
|
|
375
|
-
# Box parameters appear on separate lines until 'status open/collapsed'
|
|
376
|
-
# We just skip them; the content inside will be handled by nested layouts
|
|
377
|
-
while self.pos + 1 < len(self.lines):
|
|
378
|
-
self.pos += 1
|
|
379
|
-
nline = self.lines[self.pos].strip()
|
|
380
|
-
if nline.startswith("status "):
|
|
381
|
-
return # done consuming parameters
|
|
382
|
-
if nline.startswith("\\begin_layout") or nline == "\\end_inset":
|
|
383
|
-
# Oops, went too far - back up
|
|
384
|
-
self.pos -= 1
|
|
385
|
-
return
|
|
386
|
-
|
|
387
|
-
def _handle_formula(self, inset_spec):
|
|
388
|
-
"""Handle Formula insets - both inline and display math."""
|
|
389
|
-
formula_start = inset_spec[len("Formula ") :]
|
|
390
|
-
|
|
391
|
-
if formula_start.startswith("$"):
|
|
392
|
-
# Inline math
|
|
393
|
-
content = formula_start.strip()
|
|
394
|
-
while self.pos + 1 < len(self.lines):
|
|
395
|
-
self.pos += 1
|
|
396
|
-
nline = self.lines[self.pos].strip()
|
|
397
|
-
if nline == "\\end_inset":
|
|
398
|
-
break
|
|
399
|
-
if nline:
|
|
400
|
-
content += nline
|
|
401
|
-
# Handle soft space marker before math
|
|
402
|
-
if self.current_text.endswith("\x01"):
|
|
403
|
-
self.current_text = self.current_text[:-1]
|
|
404
|
-
self.current_text += " "
|
|
405
|
-
# Ensure space before inline math if needed
|
|
406
|
-
elif (
|
|
407
|
-
self.current_text
|
|
408
|
-
and not self.current_text.endswith((" ", "\n", "(", "[", "*"))
|
|
409
|
-
):
|
|
410
|
-
self.current_text += " "
|
|
411
|
-
self.current_text += content
|
|
412
|
-
# Peek ahead to add space after if next char is text
|
|
413
|
-
# (handled by the text accumulation logic)
|
|
414
|
-
|
|
415
|
-
elif formula_start.startswith("\\["):
|
|
416
|
-
# Display math: \[...\]
|
|
417
|
-
math_content = formula_start.strip()
|
|
418
|
-
while self.pos + 1 < len(self.lines):
|
|
419
|
-
self.pos += 1
|
|
420
|
-
nline = self.lines[self.pos].strip()
|
|
421
|
-
if nline == "\\end_inset":
|
|
422
|
-
break
|
|
423
|
-
if nline:
|
|
424
|
-
math_content += nline
|
|
425
|
-
# Strip \[ and \]
|
|
426
|
-
math_content = math_content.strip()
|
|
427
|
-
if math_content.startswith("\\["):
|
|
428
|
-
math_content = math_content[2:]
|
|
429
|
-
if math_content.endswith("\\]"):
|
|
430
|
-
math_content = math_content[:-2]
|
|
431
|
-
self.current_text += "\n\n$$\n" + math_content.strip() + "\n$$\n\n"
|
|
432
|
-
|
|
433
|
-
elif formula_start.startswith("\\begin{"):
|
|
434
|
-
# LaTeX environment
|
|
435
|
-
math_content = formula_start.strip()
|
|
436
|
-
while self.pos + 1 < len(self.lines):
|
|
437
|
-
self.pos += 1
|
|
438
|
-
nline = self.lines[self.pos].strip()
|
|
439
|
-
if nline == "\\end_inset":
|
|
440
|
-
break
|
|
441
|
-
if nline:
|
|
442
|
-
math_content += "\n" + nline
|
|
443
|
-
math_content = self._convert_latex_env(math_content)
|
|
444
|
-
self.current_text += "\n\n$$\n" + math_content.strip() + "\n$$\n\n"
|
|
445
|
-
|
|
446
|
-
else:
|
|
447
|
-
# Unknown formula type
|
|
448
|
-
math_content = formula_start
|
|
449
|
-
while self.pos + 1 < len(self.lines):
|
|
450
|
-
self.pos += 1
|
|
451
|
-
nline = self.lines[self.pos].strip()
|
|
452
|
-
if nline == "\\end_inset":
|
|
453
|
-
break
|
|
454
|
-
if nline:
|
|
455
|
-
math_content += nline
|
|
456
|
-
self.current_text += math_content
|
|
457
|
-
|
|
458
|
-
def _convert_latex_env(self, content):
|
|
459
|
-
"""Convert LaTeX environments to simpler display math."""
|
|
460
|
-
content = re.sub(r"\\begin\{eqnarray\*?\}", "", content)
|
|
461
|
-
content = re.sub(r"\\end\{eqnarray\*?\}", "", content)
|
|
462
|
-
content = re.sub(r"\\begin\{align\*?\}", "", content)
|
|
463
|
-
content = re.sub(r"\\end\{align\*?\}", "", content)
|
|
464
|
-
content = re.sub(r"\\begin\{equation\*?\}", "", content)
|
|
465
|
-
content = re.sub(r"\\end\{equation\*?\}", "", content)
|
|
466
|
-
content = re.sub(r"\\begin\{gathered\}", "", content)
|
|
467
|
-
content = re.sub(r"\\end\{gathered\}", "", content)
|
|
468
|
-
content = re.sub(r"\\begin\{array\}\{[^}]*\}", "", content)
|
|
469
|
-
content = re.sub(r"\\end\{array\}", "", content)
|
|
470
|
-
# Remove \label{...} from display math
|
|
471
|
-
content = re.sub(r"\\label\{[^}]*\}", "", content)
|
|
472
|
-
return content.strip()
|
|
473
|
-
|
|
474
|
-
def _handle_quotes(self, inset_spec):
|
|
475
|
-
"""Handle quote insets."""
|
|
476
|
-
if "eld" in inset_spec:
|
|
477
|
-
self.current_text += "\u201c"
|
|
478
|
-
elif "erd" in inset_spec:
|
|
479
|
-
self.current_text += "\u201d"
|
|
480
|
-
elif "els" in inset_spec:
|
|
481
|
-
self.current_text += "\u2018"
|
|
482
|
-
elif "ers" in inset_spec:
|
|
483
|
-
self.current_text += "\u2019"
|
|
484
|
-
else:
|
|
485
|
-
self.current_text += '"'
|
|
486
|
-
# Consume until end_inset
|
|
487
|
-
while self.pos + 1 < len(self.lines):
|
|
488
|
-
self.pos += 1
|
|
489
|
-
if self.lines[self.pos].strip() == "\\end_inset":
|
|
490
|
-
return
|
|
491
|
-
|
|
492
|
-
def _handle_label(self):
|
|
493
|
-
"""Handle CommandInset label - skip it."""
|
|
494
|
-
while self.pos + 1 < len(self.lines):
|
|
495
|
-
self.pos += 1
|
|
496
|
-
if self.lines[self.pos].strip() == "\\end_inset":
|
|
497
|
-
return
|
|
498
|
-
|
|
499
|
-
def _handle_ref(self):
|
|
500
|
-
"""Handle CommandInset ref - cross-references."""
|
|
501
|
-
ref_name = ""
|
|
502
|
-
while self.pos + 1 < len(self.lines):
|
|
503
|
-
self.pos += 1
|
|
504
|
-
nline = self.lines[self.pos].strip()
|
|
505
|
-
if nline.startswith("reference "):
|
|
506
|
-
ref_name = nline.split('"')[1] if '"' in nline else ""
|
|
507
|
-
if nline == "\\end_inset":
|
|
508
|
-
break
|
|
509
|
-
if ref_name and ref_name in self.label_map:
|
|
510
|
-
resolved = self.label_map[ref_name]
|
|
511
|
-
# Ensure space before ref text
|
|
512
|
-
if self.current_text and not self.current_text.endswith(
|
|
513
|
-
(" ", "\n", "(", "[")
|
|
514
|
-
):
|
|
515
|
-
self.current_text += " "
|
|
516
|
-
self.current_text += resolved
|
|
517
|
-
# If unresolved, just skip
|
|
518
|
-
|
|
519
|
-
def _handle_graphics(self):
|
|
520
|
-
"""Handle Graphics inset."""
|
|
521
|
-
filename = ""
|
|
522
|
-
while self.pos + 1 < len(self.lines):
|
|
523
|
-
self.pos += 1
|
|
524
|
-
nline = self.lines[self.pos].strip()
|
|
525
|
-
if nline.startswith("filename "):
|
|
526
|
-
filename = nline[len("filename ") :]
|
|
527
|
-
if nline == "\\end_inset":
|
|
528
|
-
break
|
|
529
|
-
if filename:
|
|
530
|
-
self.current_text += f"\n\n\n\n"
|
|
531
|
-
|
|
532
|
-
def _handle_tabular(self):
|
|
533
|
-
"""Handle Tabular inset - parse lyxtabular format."""
|
|
534
|
-
rows = []
|
|
535
|
-
current_row = []
|
|
536
|
-
current_cell_text = ""
|
|
537
|
-
in_cell = False
|
|
538
|
-
depth = 1
|
|
539
|
-
|
|
540
|
-
while self.pos + 1 < len(self.lines):
|
|
541
|
-
self.pos += 1
|
|
542
|
-
nline = self.lines[self.pos].strip()
|
|
543
|
-
|
|
544
|
-
if nline == "\\end_inset":
|
|
545
|
-
depth -= 1
|
|
546
|
-
if depth <= 0:
|
|
547
|
-
break
|
|
548
|
-
continue
|
|
549
|
-
|
|
550
|
-
if nline.startswith("\\begin_inset "):
|
|
551
|
-
depth += 1
|
|
552
|
-
if nline.startswith("\\begin_inset Formula"):
|
|
553
|
-
formula_spec = nline[len("\\begin_inset ") :]
|
|
554
|
-
formula_text = formula_spec[len("Formula ") :]
|
|
555
|
-
if formula_text.startswith("$"):
|
|
556
|
-
current_cell_text += formula_text.strip()
|
|
557
|
-
while self.pos + 1 < len(self.lines):
|
|
558
|
-
self.pos += 1
|
|
559
|
-
fl = self.lines[self.pos].strip()
|
|
560
|
-
if fl == "\\end_inset":
|
|
561
|
-
depth -= 1
|
|
562
|
-
break
|
|
563
|
-
if fl:
|
|
564
|
-
current_cell_text += fl
|
|
565
|
-
else:
|
|
566
|
-
math = formula_text
|
|
567
|
-
while self.pos + 1 < len(self.lines):
|
|
568
|
-
self.pos += 1
|
|
569
|
-
fl = self.lines[self.pos].strip()
|
|
570
|
-
if fl == "\\end_inset":
|
|
571
|
-
depth -= 1
|
|
572
|
-
break
|
|
573
|
-
if fl:
|
|
574
|
-
math += " " + fl
|
|
575
|
-
current_cell_text += math.strip()
|
|
576
|
-
elif nline.startswith("\\begin_inset Text"):
|
|
577
|
-
in_cell = True
|
|
578
|
-
current_cell_text = ""
|
|
579
|
-
elif nline.startswith("\\begin_inset ERT"):
|
|
580
|
-
# Skip ERT (usually \hline) inside tables
|
|
581
|
-
ert_depth = 1
|
|
582
|
-
while self.pos + 1 < len(self.lines):
|
|
583
|
-
self.pos += 1
|
|
584
|
-
el = self.lines[self.pos].strip()
|
|
585
|
-
if el.startswith("\\begin_inset"):
|
|
586
|
-
ert_depth += 1
|
|
587
|
-
if el == "\\end_inset":
|
|
588
|
-
ert_depth -= 1
|
|
589
|
-
if ert_depth <= 0:
|
|
590
|
-
depth -= 1 # balance the depth++ above
|
|
591
|
-
break
|
|
592
|
-
continue
|
|
593
|
-
|
|
594
|
-
if nline.startswith("<row"):
|
|
595
|
-
current_row = []
|
|
596
|
-
elif nline.startswith("</row"):
|
|
597
|
-
if current_row:
|
|
598
|
-
rows.append(current_row)
|
|
599
|
-
elif nline.startswith("<cell"):
|
|
600
|
-
current_cell_text = ""
|
|
601
|
-
in_cell = True
|
|
602
|
-
elif nline.startswith("</cell"):
|
|
603
|
-
current_row.append(current_cell_text.strip())
|
|
604
|
-
in_cell = False
|
|
605
|
-
elif nline.startswith("</lyxtabular"):
|
|
606
|
-
if current_row:
|
|
607
|
-
rows.append(current_row)
|
|
608
|
-
break
|
|
609
|
-
elif nline.startswith("<"):
|
|
610
|
-
continue
|
|
611
|
-
elif in_cell and nline and not nline.startswith("\\"):
|
|
612
|
-
if current_cell_text and not current_cell_text.endswith(" "):
|
|
613
|
-
current_cell_text += " "
|
|
614
|
-
current_cell_text += nline
|
|
615
|
-
|
|
616
|
-
if rows:
|
|
617
|
-
self._emit_markdown_table(rows)
|
|
618
|
-
|
|
619
|
-
def _emit_markdown_table(self, rows):
|
|
620
|
-
"""Convert rows to markdown table format."""
|
|
621
|
-
if not rows:
|
|
622
|
-
return
|
|
623
|
-
num_cols = max(len(row) for row in rows)
|
|
624
|
-
for row in rows:
|
|
625
|
-
while len(row) < num_cols:
|
|
626
|
-
row.append("")
|
|
627
|
-
|
|
628
|
-
self.current_text += "\n\n"
|
|
629
|
-
self.current_text += "| " + " | ".join(rows[0]) + " |\n"
|
|
630
|
-
self.current_text += "| " + " | ".join(["---"] * num_cols) + " |\n"
|
|
631
|
-
for row in rows[1:]:
|
|
632
|
-
self.current_text += "| " + " | ".join(row) + " |\n"
|
|
633
|
-
self.current_text += "\n"
|
|
634
|
-
|
|
635
|
-
def _handle_ert(self):
|
|
636
|
-
"""Handle ERT (Evil Red Text = raw LaTeX) insets - skip."""
|
|
637
|
-
depth = 1
|
|
638
|
-
while self.pos + 1 < len(self.lines):
|
|
639
|
-
self.pos += 1
|
|
640
|
-
nline = self.lines[self.pos].strip()
|
|
641
|
-
if nline.startswith("\\begin_inset"):
|
|
642
|
-
depth += 1
|
|
643
|
-
if nline == "\\end_inset":
|
|
644
|
-
depth -= 1
|
|
645
|
-
if depth <= 0:
|
|
646
|
-
return
|
|
647
|
-
|
|
648
|
-
def _clean_text(self, text):
|
|
649
|
-
"""Clean up accumulated text."""
|
|
650
|
-
# Remove any leftover soft space markers
|
|
651
|
-
text = text.replace("\x01", " ")
|
|
652
|
-
text = text.strip()
|
|
653
|
-
# Collapse multiple spaces (but preserve newlines for display math)
|
|
654
|
-
lines = text.split("\n")
|
|
655
|
-
lines = [re.sub(r"[ \t]+", " ", line) for line in lines]
|
|
656
|
-
text = "\n".join(lines)
|
|
657
|
-
# Fix spaces before punctuation (but not inside math)
|
|
658
|
-
# Only fix spaces before punctuation outside of $ delimiters
|
|
659
|
-
text = re.sub(r" ([.,;:!?)\]])", r"\1", text)
|
|
660
|
-
# Fix space after opening paren/bracket
|
|
661
|
-
text = re.sub(r"([\[(]) ", r"\1", text)
|
|
662
|
-
# Fix double spaces
|
|
663
|
-
text = re.sub(r" +", " ", text)
|
|
664
|
-
return text
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
def clean_markdown(text):
|
|
668
|
-
"""Post-process the markdown for cleanliness."""
|
|
669
|
-
lines = text.split("\n")
|
|
670
|
-
result = []
|
|
671
|
-
prev_blank = False
|
|
672
|
-
|
|
673
|
-
for line in lines:
|
|
674
|
-
is_blank = line.strip() == ""
|
|
675
|
-
|
|
676
|
-
if is_blank:
|
|
677
|
-
if prev_blank:
|
|
678
|
-
continue
|
|
679
|
-
prev_blank = True
|
|
680
|
-
else:
|
|
681
|
-
prev_blank = False
|
|
682
|
-
|
|
683
|
-
result.append(line)
|
|
684
|
-
|
|
685
|
-
text = "\n".join(result)
|
|
686
|
-
|
|
687
|
-
# Clean up [?...?] comment markers -> just content
|
|
688
|
-
text = re.sub(r"\[\?", "", text)
|
|
689
|
-
text = re.sub(r"\?\]", "", text)
|
|
690
|
-
|
|
691
|
-
# Clean up stray formatting
|
|
692
|
-
text = re.sub(r"\n{3,}", "\n\n", text)
|
|
693
|
-
|
|
694
|
-
# Remove spurious "LatexCommand label" or "LatexCommand ref" remnants
|
|
695
|
-
text = re.sub(r"\(LatexCommand label\)", "", text)
|
|
696
|
-
text = re.sub(r"\(LatexCommand ref\)", "", text)
|
|
697
|
-
text = re.sub(r"LatexCommand label", "", text)
|
|
698
|
-
text = re.sub(r"LatexCommand ref", "", text)
|
|
699
|
-
|
|
700
|
-
# Fix space after closing inline math before word chars
|
|
701
|
-
# Match: content$word but NOT $$word (display math)
|
|
702
|
-
# Use lookbehind for non-$ char, then $ not followed by $, then word char
|
|
703
|
-
# This catches closing $ only (opening $ is preceded by space or start)
|
|
704
|
-
# Actually this is too error-prone - skip it and rely on the converter
|
|
705
|
-
|
|
706
|
-
# Remove empty emphasis markers
|
|
707
|
-
text = re.sub(r"\*\* \*\*", "", text)
|
|
708
|
-
text = re.sub(r"\* \*", "", text)
|
|
709
|
-
text = text.replace("****", "")
|
|
710
|
-
|
|
711
|
-
# Ensure blank line before headings
|
|
712
|
-
text = re.sub(r"([^\n])\n(#{2,} )", r"\1\n\n\2", text)
|
|
713
|
-
|
|
714
|
-
# Ensure no trailing whitespace on lines
|
|
715
|
-
lines = text.split("\n")
|
|
716
|
-
lines = [line.rstrip() for line in lines]
|
|
717
|
-
text = "\n".join(lines)
|
|
718
|
-
|
|
719
|
-
return text.strip() + "\n"
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
def main():
|
|
723
|
-
if len(sys.argv) < 2:
|
|
724
|
-
print("Usage: lyx2md.py <input.lyx> [output.md]")
|
|
725
|
-
sys.exit(1)
|
|
726
|
-
|
|
727
|
-
input_file = sys.argv[1]
|
|
728
|
-
if len(sys.argv) > 2:
|
|
729
|
-
output_file = sys.argv[2]
|
|
730
|
-
else:
|
|
731
|
-
output_file = input_file.rsplit(".", 1)[0] + ".md"
|
|
732
|
-
|
|
733
|
-
converter = LyxToMarkdown()
|
|
734
|
-
result = converter.convert(input_file)
|
|
735
|
-
result = clean_markdown(result)
|
|
736
|
-
|
|
737
|
-
with open(output_file, "w", encoding="utf-8") as f:
|
|
738
|
-
f.write(result)
|
|
739
|
-
|
|
740
|
-
print(f"Converted {input_file} -> {output_file}")
|
|
741
|
-
print(f"Output: {len(result)} characters, {result.count(chr(10))} lines")
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
if __name__ == "__main__":
|
|
745
|
-
main()
|