oswatcher-plugins 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,119 @@
1
+ """Domain Node and MerkleNode classes for syscall data transformation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ from typing import Iterator, List, Optional
8
+
9
+ from attrs import define, field
10
+ from neogit.core.merkle import MerkleVisitor
11
+ from neogit.core.model import MerkleLabel, MerkleNode, Node
12
+ from neogit.core.visitor import VisitedNode
13
+
14
+
15
+ @define(auto_attribs=True)
16
+ class SyscallNode(Node):
17
+ """Represents a single syscall entry (leaf node)."""
18
+
19
+ name: str
20
+ index: int
21
+ entry_point: str
22
+ parameters: Optional[List[str]]
23
+
24
+ def iter_child_nodes(self) -> Iterator[Node]:
25
+ """Syscall is a leaf node with no children."""
26
+ return iter([])
27
+
28
+
29
+ @define(auto_attribs=True)
30
+ class SyscallTableNode(Node):
31
+ """Represents a syscall table for an architecture (internal node)."""
32
+
33
+ architecture: str # e.g., "lief._lief.ELF.ARCH.x86_64"
34
+ syscalls: List[dict] # Raw syscall data from extraction
35
+
36
+ def iter_child_nodes(self) -> Iterator[Node]:
37
+ """Yield SyscallNode for each syscall in the table."""
38
+ for sc in self.syscalls:
39
+ yield SyscallNode(
40
+ name=sc["name"],
41
+ index=sc["index"],
42
+ entry_point=sc["entry_point"],
43
+ parameters=sc.get("parameters"),
44
+ )
45
+
46
+
47
+ @define(auto_attribs=True)
48
+ class SyscallMerkleNode(MerkleNode):
49
+ """Merkle node for a syscall (content-addressed)."""
50
+
51
+ name: str = field(kw_only=True)
52
+ index: int = field(kw_only=True)
53
+ entry_point: str = field(kw_only=True)
54
+ parameters: str = field(kw_only=True) # JSON serialized
55
+
56
+
57
+ @define(auto_attribs=True)
58
+ class SyscallTableMerkleNode(MerkleNode):
59
+ """Merkle node for a syscall table (content-addressed)."""
60
+
61
+ architecture: str = field(kw_only=True)
62
+
63
+
64
+ class SyscallsMerkleVisitor(MerkleVisitor):
65
+ """Visitor for computing merkle hashes of syscall nodes."""
66
+
67
+ def visit_SyscallNode(self, node: SyscallNode, hash_obj: hashlib._Hash, *args, **kwargs):
68
+ """Visit a syscall leaf node and compute its hash.
69
+
70
+ Hash includes: index + name + entry_point + parameters (sorted)
71
+ """
72
+ # Serialize parameters to JSON (sorted for determinism)
73
+ params_json = json.dumps(node.parameters, sort_keys=True) if node.parameters else ""
74
+
75
+ # Hash: index + name + entry_point + parameters
76
+ data = f"{node.index}-{node.name}-{node.entry_point}-{params_json}".encode()
77
+ hash_obj.update(data)
78
+
79
+ merkle_node = SyscallMerkleNode( # type: ignore[call-arg]
80
+ hash=hash_obj.hexdigest(),
81
+ label=MerkleLabel.Blob, # Leaf node
82
+ name=node.name,
83
+ index=node.index,
84
+ entry_point=node.entry_point,
85
+ parameters=params_json,
86
+ )
87
+ return VisitedNode(node, merkle_node)
88
+
89
+ def visit_SyscallTableNode(self, node: SyscallTableNode, hash_obj: hashlib._Hash, *args, **kwargs):
90
+ """Visit a syscall table and aggregate child hashes.
91
+
92
+ Hash includes: all syscall hashes (sorted by index) + architecture
93
+ """
94
+ children = {}
95
+
96
+ # Sort by index for deterministic ordering
97
+ sorted_syscalls = sorted(node.iter_child_nodes(), key=lambda s: s.index)
98
+
99
+ for syscall_node in sorted_syscalls:
100
+ # Recursively visit child
101
+ visited = self.visit(syscall_node, *args, **kwargs)
102
+ merkle_child = visited.return_value
103
+
104
+ # Accumulate: syscall_name + hash
105
+ data = f"{syscall_node.name}{merkle_child.hash}\n".encode()
106
+ hash_obj.update(data)
107
+
108
+ children[syscall_node.name] = merkle_child
109
+
110
+ # Add table metadata
111
+ hash_obj.update(f"{node.architecture}".encode())
112
+
113
+ merkle_node = SyscallTableMerkleNode( # type: ignore[call-arg]
114
+ hash=hash_obj.hexdigest(),
115
+ children=children,
116
+ label=MerkleLabel.Tree, # Internal node
117
+ architecture=node.architecture,
118
+ )
119
+ return VisitedNode(node, merkle_node)
@@ -0,0 +1,46 @@
1
+ """Syscall table parsing from syscall_64.tbl format."""
2
+
3
+ import re
4
+ from typing import Optional
5
+
6
+ from .kernel_parser import SyscallIndex
7
+
8
+ # Compiled regex pattern for syscall table line parsing
9
+ # Pattern: <number> <abi> <name> [<entry_point>] [optional_fields...]
10
+ SYSCALL_TABLE_PATTERN = re.compile(r"^(\d+)\s+(common|64|x32)\s+(\w+)(?:\s+(\w+))?")
11
+
12
+
13
+ def parse_syscall_table_line(line: str) -> Optional[SyscallIndex]:
14
+ """Parse a single line from syscall_64.tbl format.
15
+
16
+ Args:
17
+ line: Table line like '0\tcommon\tread\tsys_read'
18
+
19
+ Returns:
20
+ SyscallIndex instance or None for filtered/invalid lines
21
+
22
+ Raises:
23
+ ValueError: If line format is invalid but not filtered
24
+ """
25
+ line = line.strip()
26
+
27
+ # Filter out empty lines and comments
28
+ if not line or line.startswith("#"):
29
+ return None
30
+
31
+ match = SYSCALL_TABLE_PATTERN.match(line)
32
+ if not match:
33
+ raise ValueError(f"Invalid syscall table line format: {line}")
34
+
35
+ number_str, abi, name, entry_point = match.groups()
36
+
37
+ # Some syscalls don't have entry points defined (like uselib)
38
+ if entry_point is None:
39
+ entry_point = f"sys_{name}"
40
+
41
+ # Filter out x32 ABI for 64-bit focus
42
+ if abi == "x32":
43
+ return None
44
+
45
+ index = int(number_str)
46
+ return SyscallIndex(name=name, index=index)
@@ -0,0 +1,52 @@
1
+ """Syscalls.h signature parsing from kernel headers."""
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from typing import List, Optional
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class SyscallSignature:
10
+ """Represents a syscall signature with parameters."""
11
+
12
+ name: str
13
+ parameters: List[str]
14
+
15
+
16
+ def parse_syscall_signature(syscalls_h_content: str, entry_name: str) -> Optional[SyscallSignature]:
17
+ """Parse syscall signature for a specific entry from syscalls.h content.
18
+
19
+ Args:
20
+ syscalls_h_content: Content of syscalls.h file
21
+ entry_name: Entry point name like 'sys_read'
22
+
23
+ Returns:
24
+ SyscallSignature instance or None if not found
25
+ """
26
+ # Use Filippo's regex pattern: (?:asmlinkage|unsigned) long {entry}\(([^)]+)\);
27
+ pattern = rf"(?:asmlinkage|unsigned) long {re.escape(entry_name)}\(([^)]+)\)" # noqa: E231
28
+
29
+ # Make multiline and handle whitespace
30
+ match = re.search(pattern, syscalls_h_content, re.MULTILINE | re.DOTALL)
31
+
32
+ if not match:
33
+ return None
34
+
35
+ params_str = match.group(1)
36
+
37
+ # Handle void case
38
+ if params_str.strip() == "void":
39
+ parameters: list = []
40
+ else:
41
+ # Split by comma and clean up
42
+ parameters = []
43
+ for param in params_str.split(","):
44
+ param = param.strip()
45
+ # Remove __user qualifier
46
+ param = param.replace("__user ", "")
47
+ parameters.append(param)
48
+
49
+ # Extract syscall name from entry_name (remove sys_ prefix)
50
+ name = entry_name.replace("sys_", "")
51
+
52
+ return SyscallSignature(name=name, parameters=parameters)
plugins/types.py ADDED
@@ -0,0 +1,108 @@
1
+ # define abstract Plugin Class
2
+
3
+ import logging
4
+ import tempfile
5
+ from abc import abstractmethod
6
+ from contextlib import contextmanager, suppress
7
+ from datetime import datetime, timezone
8
+ from typing import Any, List
9
+
10
+ from attrs import Factory, define, field
11
+ from neogit.model.neo import Commit, PluginRun
12
+ from neogit.service import Neogit
13
+ from neogit.utils import BetterContextManager
14
+
15
+ QUERY_CREATE_UNIQUE_CONSTRAINT = """
16
+ CREATE CONSTRAINT {name}
17
+ IF NOT EXISTS
18
+ FOR (n:{label})
19
+ REQUIRE n.{property} IS UNIQUE
20
+ """.strip()
21
+
22
+
23
+ @define(auto_attribs=True)
24
+ class UniqueConstraint:
25
+ label: str = field()
26
+ property_list: list[str] = field()
27
+
28
+
29
+ @define(auto_attribs=True)
30
+ class AbstractPlugin(BetterContextManager):
31
+ logger: logging.Logger = field(
32
+ init=False,
33
+ default=Factory(
34
+ lambda self: logging.getLogger(f"{self.__module__}.{self.__class__.__name__}"),
35
+ takes_self=True,
36
+ ),
37
+ )
38
+ neogit: Neogit = field(init=False, default=Neogit())
39
+
40
+ def __call__(self, commit: Commit, plugin_name: str, *args: Any, force: bool = False, **kwds: Any) -> Any:
41
+ """Execute the run method inside a neomodel transaction
42
+
43
+ Args:
44
+ commit: The commit to process
45
+ plugin_name: Name of the plugin being run
46
+ force: If True, rerun the plugin even if already executed
47
+ """
48
+ with suppress(IndexError):
49
+ plugin_run_node = commit.plugin.all()[0]
50
+ plugin_date = getattr(plugin_run_node, plugin_name, None)
51
+ if plugin_date is not None and not force:
52
+ self.logger.info(
53
+ "Plugin run node already executed at %s for commit %s", plugin_run_node.filetype, commit.hash
54
+ )
55
+ return
56
+ if plugin_date is not None and force:
57
+ self.logger.info(
58
+ "Force rerun enabled - re-executing plugin for commit %s (previously run at %s)",
59
+ commit.hash,
60
+ plugin_date,
61
+ )
62
+ # can't mix schema modification and write query in the same transaction
63
+ with self.neogit.db.transaction:
64
+ self.ensure_constraints()
65
+ # TODO: fix transaction
66
+ # with self.neogit.db.transaction:
67
+ self.run(commit)
68
+ with self.neogit.db.write_transaction:
69
+ try:
70
+ plugin_run_node = commit.plugin.all()[0]
71
+ self.logger.info("Plugin run node already exists for commit %s", commit.hash)
72
+ except IndexError:
73
+ self.logger.info("Creating plugin run node for commit %s", commit.hash)
74
+ plugin_run_node = PluginRun()
75
+ # node needs to be saved for the connection to be created as well
76
+ plugin_run_node.save()
77
+ # ensure connected
78
+ commit.plugin.connect(plugin_run_node)
79
+ # update plugin run with datetime (timzeone-aware UTC)
80
+ setattr(plugin_run_node, plugin_name, datetime.now(timezone.utc))
81
+ plugin_run_node.save()
82
+
83
+ self.logger.info("Plugin run node updated for commit %s", commit.hash)
84
+
85
+ def ensure_constraints(self):
86
+ """Ensure the constraints are in the database"""
87
+ for constraint in self.constraints_data():
88
+ for prop in constraint.property_list:
89
+ name = f"{constraint.label.lower()}_{prop}_unique"
90
+ query = QUERY_CREATE_UNIQUE_CONSTRAINT.format(name=name, label=constraint.label, property=prop)
91
+ self.neogit.db.cypher_query(query)
92
+
93
+ def constraints_data(self) -> List[UniqueConstraint]:
94
+ """Return the constraints data"""
95
+ return []
96
+
97
+ @abstractmethod
98
+ def run(self, commit: Commit):
99
+ """Run the plugin"""
100
+ pass
101
+
102
+ @contextmanager
103
+ def downloaded_file(self, hash: str):
104
+ with tempfile.NamedTemporaryFile(delete=True) as tmp_file:
105
+ for chunk in self.neogit.download_object_as_stream(hash):
106
+ tmp_file.write(chunk)
107
+ tmp_file.flush()
108
+ yield tmp_file.name