oswatcher-plugins 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,142 @@
1
+ """Repository layer for symbols plugin Neo4j operations."""
2
+
3
+ from pathlib import PurePath
4
+ from typing import Dict, List, Tuple
5
+
6
+ from neogit.service.neogit import cypher_query_with_backoff
7
+
8
+
9
+ class SymbolsRepository:
10
+ """Handles Neo4j database operations for symbols plugin."""
11
+
12
+ def __init__(self, neogit):
13
+ """Initialize repository with neogit service.
14
+
15
+ Args:
16
+ neogit: Neogit service instance for database access
17
+ """
18
+ self.neogit = neogit
19
+
20
+ def query_pe_blobs(self, root_hash: str, mime_type: str) -> List[Tuple[PurePath, str]]:
21
+ """Query for PE file blobs matching MIME type.
22
+
23
+ Args:
24
+ root_hash: Hash of the root Tree node
25
+ mime_type: MIME type to filter by
26
+
27
+ Returns:
28
+ List of (file_path, blob_hash) tuples
29
+ """
30
+ query = """
31
+ MATCH path = (r:Tree {hash: $root_hash})-[:HAS_CHILD_TREE|HAS_CHILD_BLOB*]->(b:Blob)
32
+ WHERE EXISTS {
33
+ MATCH (b)-[:HAS_MIME_TYPE]->(m:MimeType)
34
+ WHERE m.mime = $mime_type
35
+ }
36
+ RETURN [rel IN relationships(path) | rel.name] AS parts, b.hash
37
+ """
38
+ rows, _ = self.neogit.db.cypher_query(query, {"mime_type": mime_type, "root_hash": root_hash})
39
+ return [(PurePath(*row[0]), row[1]) for row in rows]
40
+
41
+ def insert_symbols(self, blob_hash: str, param_list: List[Dict]) -> None:
42
+ """Insert symbols into Neo4j.
43
+
44
+ Args:
45
+ blob_hash: Hash of the PE file blob
46
+ param_list: List of dicts with hash, address, and symbol name key
47
+ (`sym_name` or `name`)
48
+ """
49
+ query = """
50
+ MATCH (b:Blob {hash: $blob_hash})
51
+ WITH b
52
+ UNWIND $unwind as p
53
+ MERGE (s:Symbol {hash: p.hash, address: p.address})
54
+ MERGE (b)-[:HAS_SYMBOL {name: coalesce(p.sym_name, p.name)}]->(s)
55
+ """
56
+ cypher_query_with_backoff(query, {"blob_hash": blob_hash, "unwind": param_list})
57
+
58
+ def insert_struct(self, blob_hash: str, struct_node, unwind_param: List[Dict]) -> None:
59
+ """Insert Windows struct definition into Neo4j.
60
+
61
+ Args:
62
+ blob_hash: Hash of the PE file blob
63
+ struct_node: StructMerkleNode with hash, size, kind, name
64
+ unwind_param: List of field dicts with hash, name, offset, data_type
65
+ """
66
+ query = """
67
+ MERGE (s:Struct {hash: $hash, size: $size, kind: $kind})
68
+ WITH s
69
+ UNWIND $unwind_param as x
70
+ MERGE (f:StructField {hash: x.hash, offset: x.offset, data_type: x.data_type})
71
+ MERGE (s)-[:HAS_FIELD {name: x.name}]->(f)
72
+ WITH s
73
+ MATCH (b:Blob {hash: $blob_hash})
74
+ WITH b, s
75
+ MERGE (b)-[:HAS_STRUCT {name: $name}]->(s)
76
+ """
77
+ cypher_query_with_backoff(
78
+ query,
79
+ {
80
+ "blob_hash": blob_hash,
81
+ "unwind_param": unwind_param,
82
+ "hash": struct_node.hash,
83
+ "name": struct_node.name,
84
+ "size": struct_node.size,
85
+ "kind": struct_node.kind.name,
86
+ },
87
+ )
88
+
89
+ def insert_data_type(self, node) -> None:
90
+ """Insert Windows data type into Neo4j.
91
+
92
+ Args:
93
+ node: DataTypeMerkleNode with type metadata
94
+ """
95
+ query = """
96
+ MERGE (d:DataType {hash: $hash}) // Ensure 'hash' uniquely identifies 'DataType'
97
+ ON CREATE SET
98
+ d.type = CASE WHEN $type IS NOT NULL THEN $type END,
99
+ d.name = CASE WHEN $name IS NOT NULL THEN $name END,
100
+ d.array_counter = CASE WHEN $array_counter IS NOT NULL THEN $array_counter END,
101
+ d.bit_position = CASE WHEN $bit_position IS NOT NULL THEN $bit_position END,
102
+ d.bit_length = CASE WHEN $bit_length IS NOT NULL THEN $bit_length END
103
+ ON MATCH SET
104
+ d.type = CASE WHEN $type IS NOT NULL THEN $type END,
105
+ d.name = CASE WHEN $name IS NOT NULL THEN $name END,
106
+ d.array_counter = CASE WHEN $array_counter IS NOT NULL THEN $array_counter END,
107
+ d.bit_position = CASE WHEN $bit_position IS NOT NULL THEN $bit_position END,
108
+ d.bit_length = CASE WHEN $bit_length IS NOT NULL THEN $bit_length END
109
+ WITH d
110
+ UNWIND $children AS child
111
+ MERGE (c:DataType {hash: child.hash}) // Assuming 'hash' is unique for child nodes too
112
+ ON CREATE SET
113
+ c.type = CASE WHEN child.type IS NOT NULL THEN child.type END,
114
+ c.name = CASE WHEN child.name IS NOT NULL THEN child.name END,
115
+ c.array_counter = CASE WHEN child.array_counter IS NOT NULL THEN child.array_counter END,
116
+ c.bit_position = CASE WHEN child.bit_position IS NOT NULL THEN child.bit_position END,
117
+ c.bit_length = CASE WHEN child.bit_length IS NOT NULL THEN child.bit_length END
118
+ MERGE (d)-[:HAS_DATA_TYPE]->(c)
119
+ """
120
+ children = [
121
+ {
122
+ "hash": x.hash,
123
+ "type": x.kind.name,
124
+ "name": x.name,
125
+ "array_counter": x.array_counter,
126
+ "bit_position": x.bit_position,
127
+ "bit_length": x.bit_length,
128
+ }
129
+ for hash, x in node.children.items()
130
+ ]
131
+ cypher_query_with_backoff(
132
+ query,
133
+ {
134
+ "hash": node.hash,
135
+ "type": node.kind.name,
136
+ "name": node.name,
137
+ "array_counter": node.array_counter,
138
+ "bit_position": node.bit_position,
139
+ "bit_length": node.bit_length,
140
+ "children": children,
141
+ },
142
+ )
@@ -0,0 +1,47 @@
1
+ """Pure functions for symbols plugin business logic."""
2
+
3
+ import hashlib
4
+ from pathlib import PurePath
5
+ from typing import Dict, List, Tuple
6
+
7
+
8
+ def filter_valid_filenames(
9
+ blob_results: List[Tuple[PurePath, str]], allowed_filenames: List[str]
10
+ ) -> List[Tuple[PurePath, str]]:
11
+ """Filter blob results to only include allowed filenames.
12
+
13
+ This is a pure function with no side effects.
14
+
15
+ Args:
16
+ blob_results: List of (path, blob_hash) tuples
17
+ allowed_filenames: List of allowed filenames to keep
18
+
19
+ Returns:
20
+ Filtered list of (path, blob_hash) tuples
21
+ """
22
+ return [(path, blob_hash) for path, blob_hash in blob_results if path.name in allowed_filenames]
23
+
24
+
25
+ def parse_symbols_from_json(symbols_dict: Dict) -> List[Dict[str, str]]:
26
+ """Parse symbols from PDB JSON, filtering mangled names.
27
+
28
+ This is a pure function with no side effects.
29
+
30
+ Filters out symbols starting with '?' or '$' (compiler-generated).
31
+
32
+ Args:
33
+ symbols_dict: Dictionary mapping symbol names to symbol data
34
+
35
+ Returns:
36
+ List of symbol dictionaries with name, address, and hash fields
37
+ """
38
+ entries = []
39
+ for sym, value in sorted(symbols_dict.items()):
40
+ # Skip mangled/compiler symbols
41
+ if sym.startswith("?") or sym.startswith("$"):
42
+ continue
43
+
44
+ address = str(value["address"])
45
+ entries.append({"name": sym, "address": address, "hash": hashlib.sha1(address.encode()).hexdigest()})
46
+
47
+ return entries
@@ -0,0 +1,180 @@
1
+ """Linux syscall extraction plugin using kernel filesystem analysis and Git repository."""
2
+
3
+ from typing import Dict, List
4
+
5
+ import appdirs
6
+ from attrs import define
7
+ from neogit.model.neo import Commit
8
+
9
+ from plugins.syscalls.exceptions import KernelVersionNotFoundError, PreKernel2011Error, SyscallFileNotFoundError
10
+ from plugins.syscalls.filesystem import KernelInfo, find_kernel_versions, get_boot_directory
11
+ from plugins.syscalls.kernel_repo_manager import ensure_kernel_repo, get_syscall_files
12
+ from plugins.syscalls.nodes import SyscallsMerkleVisitor, SyscallTableNode
13
+ from plugins.syscalls.syscall_table_parser import parse_syscall_table_line
14
+ from plugins.syscalls.syscalls_h_parser import parse_syscall_signature
15
+ from plugins.types import AbstractPlugin
16
+
17
+
18
+ @define(auto_attribs=True)
19
+ class SyscallsPlugin(AbstractPlugin):
20
+ """Plugin to extract Linux syscall information from kernel files.
21
+
22
+ This plugin:
23
+ 1. Navigates to /boot directory using new Tree API
24
+ 2. Finds vmlinuz kernel files
25
+ 3. Parses kernel versions
26
+ 4. Fetches syscall information from Linux kernel Git repository
27
+ 5. Extracts syscall signatures with parameters
28
+ """
29
+
30
+ def run(self, commit: Commit):
31
+ """Execute syscall extraction for a commit.
32
+
33
+ Args:
34
+ commit: The Commit node to analyze
35
+ """
36
+ self.logger.info(f"Running syscall plugin for commit {commit.hash}")
37
+
38
+ # Get the root filesystem tree
39
+ try:
40
+ root_tree = commit.filesystem[0]
41
+ except IndexError:
42
+ self.logger.warning(f"No filesystem found for commit {commit.hash}")
43
+ return
44
+
45
+ # Navigate to /boot directory - using public function
46
+ boot_tree = get_boot_directory(root_tree)
47
+ if not boot_tree:
48
+ self.logger.info("No /boot directory found, skipping syscall extraction")
49
+ return
50
+
51
+ # Find kernel versions from vmlinuz files - using public function
52
+ kernel_info_list = find_kernel_versions(boot_tree, self)
53
+ if not kernel_info_list:
54
+ self.logger.info("No kernel files found in /boot")
55
+ return
56
+
57
+ self.logger.info(
58
+ f"Found {len(kernel_info_list)} kernel(s): "
59
+ + ", ".join(f"{k.filename} ({k.architecture})" for k in kernel_info_list)
60
+ )
61
+
62
+ # Extract syscalls from Linux kernel repository
63
+ syscall_data = self._extract_syscalls_from_repo(kernel_info_list)
64
+
65
+ # Transform to Nodes and visit with MerkleVisitor
66
+ self._transform_and_visit(kernel_info_list, syscall_data)
67
+
68
+ self.logger.info(f"Syscall extraction complete for commit {commit.hash}")
69
+
70
+ def _extract_syscalls_from_repo(self, kernel_info_list: List[KernelInfo]) -> Dict[str, List[dict]]:
71
+ """Extract syscall information from Linux kernel Git repository.
72
+
73
+ Args:
74
+ kernel_info_list: List of kernel information to extract
75
+
76
+ Returns:
77
+ Dictionary mapping kernel version to list of syscall data
78
+ """
79
+ syscall_data = {}
80
+ cache_dir = appdirs.user_cache_dir("oswatcher-plugins")
81
+ self.logger.info(f"Cloning/updating Linux kernel repository to {cache_dir}")
82
+ repo = ensure_kernel_repo(cache_dir)
83
+
84
+ # Extract syscalls for each kernel version
85
+ for kernel_info in kernel_info_list:
86
+ version = kernel_info.version
87
+ try:
88
+ syscalls = self._extract_version_syscalls(repo, version)
89
+ if syscalls:
90
+ syscall_data[version] = syscalls
91
+ self.logger.info(f"Extracted {len(syscalls)} syscalls for {version} ({kernel_info.architecture})")
92
+ except PreKernel2011Error as e:
93
+ self.logger.warning(f"Skipping {version}: {e}")
94
+ except KernelVersionNotFoundError as e:
95
+ self.logger.warning(f"Version {version} not found in repository: {e}") # noqa: E713
96
+ except SyscallFileNotFoundError as e:
97
+ self.logger.warning(f"Syscall files not found for {version}: {e}")
98
+ except Exception as e:
99
+ self.logger.error(f"Failed to extract syscalls for {version}: {e}")
100
+
101
+ return syscall_data
102
+
103
+ def _extract_version_syscalls(self, repo, version: str) -> List[dict]:
104
+ """Extract syscall data for a specific kernel version.
105
+
106
+ Args:
107
+ repo: Git repository object
108
+ version: Kernel version like 'v5.15'
109
+
110
+ Returns:
111
+ List of dictionaries with syscall information
112
+ """
113
+ # Get syscall table and header files from repository
114
+ table_content, header_content = get_syscall_files(repo, version)
115
+
116
+ syscalls = []
117
+
118
+ # Parse syscall table
119
+ for line in table_content.splitlines():
120
+ syscall_index = parse_syscall_table_line(line)
121
+ if syscall_index:
122
+ # Get signature from header file
123
+ entry_name = f"sys_{syscall_index.name}"
124
+ signature = parse_syscall_signature(header_content, entry_name)
125
+
126
+ syscall_info = {
127
+ "name": syscall_index.name,
128
+ "index": syscall_index.index,
129
+ "entry_point": entry_name,
130
+ }
131
+
132
+ if signature:
133
+ syscall_info["parameters"] = signature.parameters
134
+ else:
135
+ syscall_info["parameters"] = None
136
+ self.logger.debug(f"No signature found for {entry_name}")
137
+
138
+ syscalls.append(syscall_info)
139
+
140
+ return syscalls
141
+
142
+ def _transform_and_visit(self, kernel_info_list: List[KernelInfo], syscall_data: Dict[str, List[dict]]):
143
+ """Transform syscall data to Nodes and visit with MerkleVisitor.
144
+
145
+ Args:
146
+ kernel_info_list: List of kernel information
147
+ syscall_data: Dictionary mapping kernel version to syscall list
148
+ """
149
+ if not syscall_data:
150
+ self.logger.info("No syscall data to transform")
151
+ return
152
+
153
+ # Create visitor (threaded for async processing)
154
+ with SyscallsMerkleVisitor(thread=True) as visitor:
155
+ # Create SyscallTableNode for each kernel
156
+ for kernel_info in kernel_info_list:
157
+ version_data = syscall_data.get(kernel_info.version)
158
+ if not version_data:
159
+ continue
160
+
161
+ # Create SyscallTableNode
162
+ table_node = SyscallTableNode(architecture=kernel_info.architecture, syscalls=version_data)
163
+
164
+ # Visit to compute hashes
165
+ self.logger.info(
166
+ f"Creating SyscallTableNode for {kernel_info.version} "
167
+ f"({kernel_info.architecture}) with {len(version_data)} syscalls"
168
+ )
169
+ visitor.run_visit(table_node)
170
+
171
+ # Iterate visited nodes (includes both SyscallMerkleNodes and SyscallTableMerkleNodes)
172
+ for visited_node in visitor.as_gen():
173
+ merkle_node = visited_node.return_value
174
+ # Only log SyscallTableMerkleNodes (skip individual syscall children)
175
+ if hasattr(merkle_node, "architecture"):
176
+ self.logger.info(
177
+ f"Created SyscallTableMerkleNode: hash={merkle_node.hash[:8]}... "
178
+ f"arch={merkle_node.architecture} children={len(merkle_node.children)}"
179
+ )
180
+ # TODO: Insert into Neo4j (next phase)
@@ -0,0 +1,25 @@
1
+ """Syscall extraction specific exceptions."""
2
+
3
+
4
+ class SyscallExtractionError(Exception):
5
+ """Base exception for syscall extraction errors."""
6
+
7
+ pass
8
+
9
+
10
+ class KernelVersionNotFoundError(SyscallExtractionError):
11
+ """Kernel version does not exist in the repository."""
12
+
13
+ pass
14
+
15
+
16
+ class PreKernel2011Error(SyscallExtractionError):
17
+ """Kernel version predates 2011 syscall table format (not supported)."""
18
+
19
+ pass
20
+
21
+
22
+ class SyscallFileNotFoundError(SyscallExtractionError):
23
+ """Required syscall files not found for this kernel version."""
24
+
25
+ pass
@@ -0,0 +1,121 @@
1
+ """Filesystem navigation utilities for Linux kernel analysis."""
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import PurePath
5
+ from typing import TYPE_CHECKING, List, Optional
6
+
7
+ import lief
8
+ from neogit.model.merkle import Blob, Tree
9
+
10
+ if TYPE_CHECKING:
11
+ from plugins.types import AbstractPlugin
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class KernelInfo:
16
+ """Information about a kernel found in /boot directory."""
17
+
18
+ version: str # e.g., "v5.15"
19
+ blob_hash: str # Neo4j Blob hash
20
+ filename: str # e.g., "vmlinuz-5.15.0-91-generic"
21
+ architecture: str # e.g., "x86_64", "AARCH64" (from lief enum)
22
+
23
+
24
+ def detect_kernel_arch(vmlinuz_path: str) -> str:
25
+ """Detect kernel architecture using lief parser.
26
+
27
+ Handles both raw ELF kernels (vmlinux) and compressed vmlinuz files
28
+ with an EFI boot stub (PE header).
29
+
30
+ Args:
31
+ vmlinuz_path: Path to vmlinuz file (local filesystem)
32
+
33
+ Returns:
34
+ Architecture string from lief enum (e.g., "ARCH.x86_64", "MACHINE_TYPES.AMD64")
35
+
36
+ Raises:
37
+ ValueError: If file cannot be parsed or architecture cannot be determined
38
+ """
39
+ binary = lief.parse(vmlinuz_path)
40
+
41
+ if binary is None:
42
+ raise ValueError(f"Failed to parse {vmlinuz_path}")
43
+
44
+ # Raw ELF kernel (e.g., vmlinux)
45
+ if isinstance(binary, lief.ELF.Binary):
46
+ return str(binary.header.machine_type)
47
+
48
+ # Compressed vmlinuz with EFI boot stub (PE header)
49
+ if isinstance(binary, lief.PE.Binary):
50
+ return str(binary.header.machine)
51
+
52
+ raise ValueError(f"Unsupported binary format: {vmlinuz_path}")
53
+
54
+
55
+ def get_boot_directory(root: Tree) -> Optional[Tree]:
56
+ """Navigate to /boot directory.
57
+
58
+ Args:
59
+ root: Root filesystem tree
60
+
61
+ Returns:
62
+ Tree node for /boot directory, or None if not found or not a directory
63
+ """
64
+ try:
65
+ boot = root.get_child_at_path(PurePath("/boot"))
66
+ if isinstance(boot, Tree):
67
+ return boot
68
+ except FileNotFoundError:
69
+ pass
70
+ return None
71
+
72
+
73
+ def find_kernel_versions(boot: Tree, plugin: "AbstractPlugin") -> List[KernelInfo]:
74
+ """Find kernel versions from /boot directory contents.
75
+
76
+ Args:
77
+ boot: /boot directory tree
78
+ plugin: Plugin instance (for downloading blobs)
79
+
80
+ Returns:
81
+ Sorted list of unique kernel information (version, hash, filename, architecture)
82
+ """
83
+ from plugins.syscalls.kernel_parser import parse_kernel_version
84
+
85
+ kernel_infos = []
86
+ seen_versions = set()
87
+
88
+ for name, child in boot.iter_children():
89
+ if not isinstance(child, Blob) or not name.startswith("vmlinuz-"):
90
+ continue
91
+
92
+ try:
93
+ version = parse_kernel_version(name)
94
+
95
+ # Skip duplicate versions (multiple builds of same version)
96
+ if version in seen_versions:
97
+ continue
98
+
99
+ # Download blob and detect architecture
100
+ with plugin.downloaded_file(child.hash) as vmlinuz_path:
101
+ try:
102
+ architecture = detect_kernel_arch(vmlinuz_path)
103
+ except ValueError as e:
104
+ plugin.logger.warning(f"Failed to detect architecture for {name}: {e}")
105
+ continue
106
+
107
+ kernel_info = KernelInfo(
108
+ version=version,
109
+ blob_hash=child.hash,
110
+ filename=name,
111
+ architecture=architecture,
112
+ )
113
+ kernel_infos.append(kernel_info)
114
+ seen_versions.add(version)
115
+
116
+ except ValueError:
117
+ # Skip files that don't parse as valid kernel versions
118
+ continue
119
+
120
+ # Sort by version for deterministic ordering
121
+ return sorted(kernel_infos, key=lambda k: k.version)
@@ -0,0 +1,36 @@
1
+ """Kernel version and syscall parsing from boot filenames and headers."""
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+
6
+ # Compiled regex pattern for kernel filename parsing
7
+ # Pattern: vmlinuz-{major}.{minor}.{patch}-{build}-{flavor}
8
+ KERNEL_VERSION_PATTERN = re.compile(r"^vmlinuz-(\d+)\.(\d+)\..*")
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class SyscallIndex:
13
+ """Represents a syscall with its name and index."""
14
+
15
+ name: str
16
+ index: int
17
+
18
+
19
+ def parse_kernel_version(filename: str) -> str:
20
+ """Parse kernel version from vmlinuz filename.
21
+
22
+ Args:
23
+ filename: Boot filename like 'vmlinuz-5.15.0-91-generic'
24
+
25
+ Returns:
26
+ Kernel version like 'v5.15'
27
+
28
+ Raises:
29
+ ValueError: If filename format is invalid
30
+ """
31
+ match = KERNEL_VERSION_PATTERN.match(filename)
32
+ if not match:
33
+ raise ValueError(f"Invalid kernel filename format: {filename}")
34
+
35
+ major, minor = match.groups()
36
+ return f"v{major}.{minor}"
@@ -0,0 +1,106 @@
1
+ """Kernel repository management using git show for blob extraction."""
2
+
3
+ from pathlib import Path
4
+
5
+ import git
6
+ from git.exc import GitCommandError
7
+
8
+ from .exceptions import KernelVersionNotFoundError, PreKernel2011Error, SyscallFileNotFoundError
9
+
10
+
11
+ def ensure_kernel_repo(cache_dir: str) -> git.Repo:
12
+ """Ensure Linux kernel repository exists in cache directory.
13
+
14
+ Args:
15
+ cache_dir: Cache directory path
16
+
17
+ Returns:
18
+ Git repository object
19
+ """
20
+ cache_path = Path(cache_dir)
21
+ linux_path = cache_path / "linux"
22
+
23
+ if linux_path.exists() and (linux_path / ".git").exists():
24
+ # Repository already exists, open it and fetch latest tags
25
+ repo = git.Repo(linux_path)
26
+ # Fetch latest tags
27
+ repo.remotes.origin.fetch(tags=True)
28
+ return repo
29
+ else:
30
+ # Clone the repository
31
+ repo = git.Repo.clone_from("https://github.com/torvalds/linux.git", linux_path)
32
+ # Fetch tags (clone doesn't fetch tags by default in some Git versions)
33
+ repo.remotes.origin.fetch(tags=True)
34
+ return repo
35
+
36
+
37
+ def get_file_content(repo: git.Repo, version: str, file_path: str) -> str:
38
+ """Get file content at specific version using git show.
39
+
40
+ Args:
41
+ repo: Git repository object
42
+ version: Git tag/commit like 'v5.15'
43
+ file_path: File path relative to repo root
44
+
45
+ Returns:
46
+ File content as string
47
+
48
+ Raises:
49
+ KernelVersionNotFoundError: If kernel version doesn't exist
50
+ SyscallFileNotFoundError: If file doesn't exist at that version
51
+ """
52
+ try:
53
+ return repo.git.show(f"{version}:{file_path}") # noqa: E231
54
+ except GitCommandError as e:
55
+ error_str = str(e)
56
+ # Check for invalid revision/version
57
+ if "bad revision" in error_str.lower() or "unknown revision" in error_str.lower():
58
+ raise KernelVersionNotFoundError(f"Kernel version {version} not found in repository") # noqa: E713
59
+ # Check for path not found in tree
60
+ elif "path" in error_str and "not in" in error_str:
61
+ raise SyscallFileNotFoundError(f"File {file_path} not found in version {version}") # noqa: E713
62
+ # Check for other "does not exist" errors
63
+ elif "does not exist" in error_str:
64
+ if version in error_str:
65
+ raise KernelVersionNotFoundError(f"Kernel version {version} not found in repository") # noqa: E713
66
+ else:
67
+ raise SyscallFileNotFoundError(f"File {file_path} not found in version {version}") # noqa: E713
68
+ # Re-raise original error if we can't classify it
69
+ raise
70
+
71
+
72
+ def get_syscall_files(repo: git.Repo, version: str) -> tuple[str, str]:
73
+ """Get syscall table and header file contents for a kernel version.
74
+
75
+ Args:
76
+ repo: Git repository object
77
+ version: Kernel version like 'v5.15'
78
+
79
+ Returns:
80
+ Tuple of (table_content, header_content)
81
+
82
+ Raises:
83
+ PreKernel2011Error: If kernel predates 2011 syscall table format
84
+ KernelVersionNotFoundError: If kernel version doesn't exist
85
+ SyscallFileNotFoundError: If syscall files don't exist
86
+ """
87
+ # Try post-2011 location first
88
+ table_path = "arch/x86/entry/syscalls/syscall_64.tbl"
89
+ header_path = "include/linux/syscalls.h"
90
+
91
+ try:
92
+ table_content = get_file_content(repo, version, table_path)
93
+ header_content = get_file_content(repo, version, header_path)
94
+ return table_content, header_content
95
+ except SyscallFileNotFoundError as e:
96
+ # Check if it's a pre-2011 kernel (no .tbl files)
97
+ if table_path in str(e):
98
+ try:
99
+ # Try to get syscalls.h to see if the version exists
100
+ get_file_content(repo, version, header_path)
101
+ # If syscalls.h exists but .tbl doesn't, it's pre-2011
102
+ raise PreKernel2011Error(f"Kernel {version} predates 2011 syscall table format")
103
+ except (SyscallFileNotFoundError, KernelVersionNotFoundError):
104
+ # Neither file exists - version might be invalid
105
+ raise KernelVersionNotFoundError(f"Kernel version {version} not found")
106
+ raise