picorescue 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
picorescue/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """picorescue: inspect & recover LittleFS/FAT partitions from Pico flash dumps."""
2
+
3
+ __all__ = ["bidecl", "dump", "lfs", "fat", "carve"]
picorescue/bidecl.py ADDED
@@ -0,0 +1,289 @@
1
+ """Vendored Binary Info (bi_decl) parser, adapted from py_decl.
2
+
3
+ Originally from https://github.com/gadgetoid/py_decl (MIT). Trimmed to the
4
+ parser itself; the CLI/argparse harness is removed. Used here to locate
5
+ ``BlockDevice`` declarations (LittleFS / FAT partitions) inside a Pico binary.
6
+ """
7
+ import io
8
+ import struct
9
+ import sys
10
+
11
+ UF2_MAGIC_START0 = 0x0A324655 # "UF2\n"
12
+ UF2_MAGIC_START1 = 0x9E5D5157
13
+ UF2_MAGIC_END = 0x0AB16F30
14
+
15
+ FAMILY_ID_RP2040 = 0xE48BFF56
16
+ FAMILY_ID_PAD = 0xE48BFF57
17
+ FAMILY_ID_RP2350 = 0xE48BFF59
18
+
19
+ FLASH_START_ADDR = 0x10000000
20
+
21
+ BLOCK_SIZE = 512
22
+ DATA_SIZE = 256
23
+ HEADER_SIZE = 32
24
+ FOOTER_SIZE = 4
25
+
26
+ BI_MAGIC = b"\xf2\xeb\x88\x71"
27
+ BI_END = b"\x90\xa3\x1a\xe7"
28
+
29
+ GPIO_FUNCS = {
30
+ 0: "XIP", 1: "SPI", 2: "UART", 3: "I2C", 4: "PWM", 5: "SIO",
31
+ 6: "PIO0", 7: "PIO1", 8: "GPCK", 9: "USB", 0xF: "NULL",
32
+ }
33
+
34
+ TYPE_RAW_DATA = 1
35
+ TYPE_SIZED_DATA = 2
36
+ TYPE_LIST_ZERO_TERMINATED = 3
37
+ TYPE_BSON = 4
38
+ TYPE_ID_AND_INT = 5
39
+ TYPE_ID_AND_STRING = 6
40
+ TYPE_BLOCK_DEVICE = 7
41
+ TYPE_PINS_WITH_FUNC = 8
42
+ TYPE_PINS_WITH_NAME = 9
43
+ TYPE_NAMED_GROUP = 10
44
+
45
+ ID_PROGRAM_NAME = 0x02031C86
46
+ ID_PROGRAM_VERSION_STRING = 0x11A9BC3A
47
+ ID_PROGRAM_BUILD_DATE_STRING = 0x9DA22254
48
+ ID_BINARY_END = 0x68F465DE
49
+ ID_PROGRAM_URL = 0x1856239A
50
+ ID_PROGRAM_DESCRIPTION = 0xB6A07C19
51
+ ID_PROGRAM_FEATURE = 0xA1F4B453
52
+ ID_PROGRAM_BUILD_ATTRIBUTE = 0x4275F0D3
53
+ ID_SDK_VERSION = 0x5360B3AB
54
+ ID_PICO_BOARD = 0xB63CFFBB
55
+ ID_BOOT2_NAME = 0x7F8882E1
56
+ ID_FILESYSTEM = 0x1009BE7E
57
+
58
+ IDS = {
59
+ ID_PROGRAM_NAME: "Program Name",
60
+ ID_PROGRAM_VERSION_STRING: "Program Version",
61
+ ID_PROGRAM_BUILD_DATE_STRING: "Build Date",
62
+ ID_BINARY_END: "Binary End Address",
63
+ ID_PROGRAM_URL: "Program URL",
64
+ ID_PROGRAM_DESCRIPTION: "Program Description",
65
+ ID_PROGRAM_FEATURE: "Program Feature",
66
+ ID_PROGRAM_BUILD_ATTRIBUTE: "Program Build Attribute",
67
+ ID_SDK_VERSION: "SDK Version",
68
+ ID_PICO_BOARD: "Pico Board",
69
+ ID_BOOT2_NAME: "Boot Stage 2 Name",
70
+ }
71
+
72
+ TYPES = {
73
+ TYPE_RAW_DATA: "Raw Data",
74
+ TYPE_SIZED_DATA: "Sized Data",
75
+ TYPE_LIST_ZERO_TERMINATED: "Zero Terminated List",
76
+ TYPE_BSON: "BSON",
77
+ TYPE_ID_AND_INT: "ID & Int",
78
+ TYPE_ID_AND_STRING: "ID & Str",
79
+ TYPE_BLOCK_DEVICE: "Block Device",
80
+ TYPE_PINS_WITH_FUNC: "Pins With Func",
81
+ TYPE_PINS_WITH_NAME: "Pins With Name",
82
+ TYPE_NAMED_GROUP: "Named Group",
83
+ }
84
+
85
+ # Block device permission / partition-table flags.
86
+ BLOCK_DEV_FLAG_READ = 1 << 0
87
+ BLOCK_DEV_FLAG_WRITE = 1 << 1
88
+ BLOCK_DEV_FLAG_REFORMAT = 1 << 2
89
+
90
+ ALWAYS_A_LIST = ("NamedGroup", "BlockDevice", "ProgramFeature")
91
+
92
+
93
+ class UF2Reader(io.BytesIO):
94
+ """Flatten the first RP2040/RP2350 family section of a UF2 into a BytesIO.
95
+
96
+ NOTE: this concatenates block data and is only suitable for the contiguous
97
+ firmware region (which is what bi_decl parsing needs). For correctly
98
+ *addressed* flash images use :func:`picorescue.dump.load_image`.
99
+ """
100
+
101
+ def __init__(self, filepath):
102
+ bin_data = b""
103
+ for section in self.uf2_to_bin(filepath):
104
+ _, _, family_id, _, _, block_data = section
105
+ if family_id in (FAMILY_ID_RP2040, FAMILY_ID_RP2350):
106
+ bin_data = block_data
107
+ break
108
+ io.BytesIO.__init__(self, bin_data)
109
+
110
+ def uf2_to_bin(self, filepath):
111
+ with open(filepath, "rb") as file:
112
+ section_index = 0
113
+ while data := file.read(BLOCK_SIZE):
114
+ _, _, _, addr, _, block_no, num_blocks, family_id = struct.unpack(
115
+ b"<IIIIIIII", data[0:HEADER_SIZE]
116
+ )
117
+ if block_no == 0:
118
+ file.seek(file.tell() - BLOCK_SIZE)
119
+ yield (
120
+ section_index, addr, family_id, _, num_blocks,
121
+ b"".join(self.uf2_section_data(file)),
122
+ )
123
+ section_index += 1
124
+
125
+ def uf2_section_data(self, file):
126
+ count = 0
127
+ while data := file.read(BLOCK_SIZE):
128
+ _, _, _, addr, _, block_no, num_blocks, family_id = struct.unpack(
129
+ b"<IIIIIIII", data[0:HEADER_SIZE]
130
+ )
131
+ if block_no == 0 and count > 0:
132
+ file.seek(file.tell() - BLOCK_SIZE)
133
+ break
134
+ yield data[HEADER_SIZE:HEADER_SIZE + DATA_SIZE]
135
+ count += 1
136
+
137
+
138
+ class PyDecl:
139
+ def __init__(self, file, debug=False):
140
+ self.entry_parsers = {
141
+ TYPE_ID_AND_INT: self._parse_type_id_and_int,
142
+ TYPE_ID_AND_STRING: self._parse_type_id_and_str,
143
+ TYPE_BLOCK_DEVICE: self._parse_block_device,
144
+ TYPE_NAMED_GROUP: self._parse_named_group,
145
+ TYPE_PINS_WITH_FUNC: self._parse_pins_with_func,
146
+ TYPE_PINS_WITH_NAME: self._parse_pins_with_name,
147
+ }
148
+ self.file = file
149
+ self.debug = debug
150
+
151
+ def parse(self):
152
+ self.file.seek(0)
153
+ if self.read_until(BI_MAGIC) is None:
154
+ return None
155
+ data = self.read_until(BI_END)
156
+ if len(data) != 12:
157
+ return None
158
+ entries_start, entries_end, _ = struct.unpack("III", data)
159
+ entries_start = self.addr_to_bin_offset(entries_start)
160
+ entries_end = self.addr_to_bin_offset(entries_end)
161
+ entries_bytes_len = entries_end - entries_start
162
+ entries_len = entries_bytes_len // 4
163
+
164
+ self.file.seek(entries_start)
165
+ data = self.file.read(entries_bytes_len)
166
+ if len(data) != entries_bytes_len:
167
+ return None
168
+ entries = struct.unpack("I" * entries_len, data)
169
+
170
+ parsed = {}
171
+ for entry in entries:
172
+ self.file.seek(self.addr_to_bin_offset(entry))
173
+ if (parsed_entry := self.parse_entry()) is not None:
174
+ k, v = parsed_entry
175
+ if k in parsed:
176
+ if k == "Pins":
177
+ parsed[k].update(v)
178
+ continue
179
+ if isinstance(parsed[k], list):
180
+ parsed[k] += [v]
181
+ else:
182
+ parsed[k] = [parsed[k], v]
183
+ else:
184
+ parsed[k] = [v] if k in ALWAYS_A_LIST else v
185
+
186
+ if "NamedGroup" in parsed:
187
+ for group in parsed["NamedGroup"]:
188
+ if group["id"] in parsed:
189
+ group["data"] = parsed[group["id"]]
190
+ del parsed[group["id"]]
191
+ return parsed
192
+
193
+ def addr_to_bin_offset(self, addr):
194
+ return addr - FLASH_START_ADDR
195
+
196
+ def data_type_to_str(self, data_type):
197
+ return TYPES.get(data_type, "Unknown")
198
+
199
+ def data_id_to_str(self, data_id):
200
+ return IDS.get(data_id, "Unknown")
201
+
202
+ def is_valid_data_id(self, data_id):
203
+ return data_id in IDS
204
+
205
+ def data_id_to_typename(self, data_id):
206
+ return self.data_id_to_str(data_id).replace(" ", "")
207
+
208
+ def _read_until(self, delimiter=b"\x00"):
209
+ while (chunk := self.file.read(len(delimiter))) != delimiter:
210
+ if len(chunk) == 0:
211
+ raise EOFError
212
+ yield chunk
213
+
214
+ def read_until(self, delimiter=b"\x00"):
215
+ try:
216
+ return b"".join(self._read_until(delimiter))
217
+ except EOFError:
218
+ return None
219
+
220
+ def lookup_string(self, address):
221
+ self.file.seek(self.addr_to_bin_offset(address))
222
+ return self.read_until(delimiter=b"\x00").decode("utf-8", "replace")
223
+
224
+ def _parse_type_id_and_int(self, tag):
225
+ data_id, data_value = struct.unpack("<II", self.file.read(8))
226
+ if self.is_valid_data_id(data_id):
227
+ return self.data_id_to_typename(data_id), data_value
228
+ return data_id, data_value
229
+
230
+ def _parse_type_id_and_str(self, tag):
231
+ data_id, str_addr = struct.unpack("<II", self.file.read(8))
232
+ data_value = self.lookup_string(str_addr)
233
+ if self.is_valid_data_id(data_id):
234
+ return self.data_id_to_typename(data_id), data_value
235
+ return data_id, data_value
236
+
237
+ def _parse_block_device(self, tag):
238
+ name_addr, start_addr, size, _more_info_addr, flags = struct.unpack(
239
+ "<IIIIH", self.file.read(18)
240
+ )
241
+ name = self.lookup_string(name_addr)
242
+ return "BlockDevice", {
243
+ "name": name, "address": start_addr, "size": size, "flags": flags,
244
+ }
245
+
246
+ def _parse_named_group(self, tag):
247
+ parent_id, flags, group_tag, group_id, label_addr = struct.unpack(
248
+ "<IHHII", self.file.read(16)
249
+ )
250
+ label = self.lookup_string(label_addr)
251
+ return "NamedGroup", {
252
+ "label": label, "parent": parent_id, "flags": flags,
253
+ "tag": group_tag, "id": group_id,
254
+ }
255
+
256
+ def _parse_pins_with_func(self, tag):
257
+ pin_encoding = struct.unpack("<I", self.file.read(4))[0]
258
+ encoding_type = pin_encoding & 0b111
259
+ func = (pin_encoding & 0b1111000) >> 3
260
+ func_name = GPIO_FUNCS.get(func)
261
+ pin_encoding >>= 7
262
+ pins = []
263
+ if encoding_type == 0b001:
264
+ for _ in range(5):
265
+ pins.append(pin_encoding & 0b11111)
266
+ pin_encoding >>= 5
267
+ elif encoding_type == 0b010:
268
+ pin_end = pin_encoding & 0b11111
269
+ pin_start = (pin_encoding >> 5) & 0b11111
270
+ pins = list(range(pin_start, pin_end + 1))
271
+ return "Pins", {pin: {"function": func_name} for pin in pins}
272
+
273
+ def _parse_pins_with_name(self, tag):
274
+ pin_mask, name_addr = struct.unpack("<II", self.file.read(8))
275
+ name = self.lookup_string(name_addr)
276
+ pin_no = bin(pin_mask)[::-1].index("1")
277
+ return "Pins", {pin_no: {"name": name}}
278
+
279
+ def parse_entry(self, include_tags=("RP", "MP")):
280
+ data_type, tag = struct.unpack("<H2s", self.file.read(4))
281
+ if tag.decode("utf-8", "replace") in include_tags:
282
+ try:
283
+ return self.entry_parsers[data_type](tag)
284
+ except KeyError:
285
+ if self.debug:
286
+ sys.stderr.write(
287
+ f"ERROR: No parser for: {self.data_type_to_str(data_type)}\n"
288
+ )
289
+ return None
picorescue/carve.py ADDED
@@ -0,0 +1,115 @@
1
+ """Carve Python source out of raw flash regions.
2
+
3
+ Filesystem-agnostic recovery: most rescue targets are small UTF-8 text files
4
+ (MicroPython scripts). When a file is deleted or its directory entry is gone,
5
+ the *content* usually still sits in flash until that block is erased and
6
+ rewritten. We scan for printable text runs and score them for "Python-ness".
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ import re
12
+ from dataclasses import dataclass
13
+
14
+ # Bytes we treat as "text": printable ASCII + tab/newline/carriage-return.
15
+ _TEXT = bytes(range(0x20, 0x7F)) + b"\t\n\r"
16
+ _TEXT_SET = set(_TEXT)
17
+
18
+ # Strong Python/MicroPython signals.
19
+ _KEYWORDS = re.compile(
20
+ rb"\b(?:import|from|def|class|return|self|print|lambda|async|await|"
21
+ rb"machine|micropython|const|while|for|if|elif|else|try|except|with|"
22
+ rb"yield|raise|global|nonlocal)\b"
23
+ )
24
+ _ASSIGN = re.compile(rb"^\s*[A-Za-z_][A-Za-z0-9_]*\s*=", re.MULTILINE)
25
+ _DEF_OR_IMPORT = re.compile(rb"^\s*(?:def |class |import |from )", re.MULTILINE)
26
+
27
+
28
+ @dataclass
29
+ class Candidate:
30
+ offset: int # offset within the scanned region
31
+ data: bytes
32
+ score: float
33
+ sha1: str
34
+
35
+ @property
36
+ def text(self) -> str:
37
+ return self.data.decode("utf-8", "replace")
38
+
39
+ @property
40
+ def line_count(self) -> int:
41
+ return self.data.count(b"\n") + 1
42
+
43
+ def suggested_name(self) -> str:
44
+ """Guess a filename from a shebang, module docstring, or first def/class."""
45
+ head = self.data[:512]
46
+ m = re.search(rb"^#\s*([\w.\-/]+\.py)\b", head, re.MULTILINE)
47
+ if m:
48
+ return m.group(1).decode("ascii", "replace").replace("/", "_")
49
+ m = re.search(rb"^\s*class\s+([A-Za-z_]\w*)", head, re.MULTILINE)
50
+ if m:
51
+ return m.group(1).decode("ascii", "replace") + ".py"
52
+ m = re.search(rb"^\s*def\s+([A-Za-z_]\w*)", head, re.MULTILINE)
53
+ if m:
54
+ return m.group(1).decode("ascii", "replace") + ".py"
55
+ return "unknown.py"
56
+
57
+
58
+ def _text_runs(data: bytes, min_len: int):
59
+ """Yield (offset, bytes) for maximal runs of text bytes >= min_len."""
60
+ start = None
61
+ for i, b in enumerate(data):
62
+ if b in _TEXT_SET:
63
+ if start is None:
64
+ start = i
65
+ else:
66
+ if start is not None and i - start >= min_len:
67
+ yield start, data[start:i]
68
+ start = None
69
+ if start is not None and len(data) - start >= min_len:
70
+ yield start, data[start:]
71
+
72
+
73
+ def score(run: bytes) -> float:
74
+ """Heuristic 0..~ score that a text run is Python source."""
75
+ if not run:
76
+ return 0.0
77
+ s = 0.0
78
+ kw = len(_KEYWORDS.findall(run))
79
+ s += kw * 2.0
80
+ s += len(_DEF_OR_IMPORT.findall(run)) * 4.0
81
+ s += len(_ASSIGN.findall(run)) * 1.0
82
+ # Reward multi-line, indented structure.
83
+ lines = run.split(b"\n")
84
+ if len(lines) >= 3:
85
+ s += 2.0
86
+ if any(ln.startswith((b" ", b"\t")) for ln in lines):
87
+ s += 2.0
88
+ # Penalise runs that look like a single long blob (no newlines).
89
+ if b"\n" not in run and len(run) > 200:
90
+ s -= 3.0
91
+ # Normalise lightly by length so a giant blob with one keyword doesn't win.
92
+ return s
93
+
94
+
95
+ def carve(data: bytes, min_len: int = 40, min_score: float = 6.0,
96
+ known_hashes: set[str] | None = None) -> list[Candidate]:
97
+ """Return scored Python-source candidates found in ``data``."""
98
+ known_hashes = known_hashes or set()
99
+ out = []
100
+ seen: set[str] = set()
101
+ for offset, run in _text_runs(data, min_len):
102
+ sc = score(run)
103
+ if sc < min_score:
104
+ continue
105
+ # Trim leading/trailing junk to whole lines.
106
+ trimmed = run.strip(b"\x00").strip()
107
+ if not trimmed:
108
+ continue
109
+ sha1 = hashlib.sha1(trimmed).hexdigest()
110
+ if sha1 in known_hashes or sha1 in seen:
111
+ continue
112
+ seen.add(sha1)
113
+ out.append(Candidate(offset=offset, data=trimmed, score=sc, sha1=sha1))
114
+ out.sort(key=lambda c: c.score, reverse=True)
115
+ return out