oswatcher-plugins 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oswatcher_plugins-0.14.0.dist-info/METADATA +79 -0
- oswatcher_plugins-0.14.0.dist-info/RECORD +26 -0
- oswatcher_plugins-0.14.0.dist-info/WHEEL +4 -0
- oswatcher_plugins-0.14.0.dist-info/entry_points.txt +3 -0
- oswatcher_plugins-0.14.0.dist-info/licenses/LICENSE +202 -0
- plugins/__init__.py +0 -0
- plugins/__main__.py +69 -0
- plugins/config/__init__.py +18 -0
- plugins/config/default_settings.toml +1 -0
- plugins/plugins/__init__.py +29 -0
- plugins/plugins/filetype.py +58 -0
- plugins/plugins/linux_symbols.py +271 -0
- plugins/plugins/linux_symbols_service.py +373 -0
- plugins/plugins/registry.py +224 -0
- plugins/plugins/symbols.py +475 -0
- plugins/plugins/symbols_repository.py +142 -0
- plugins/plugins/symbols_service.py +47 -0
- plugins/plugins/syscalls.py +180 -0
- plugins/syscalls/exceptions.py +25 -0
- plugins/syscalls/filesystem.py +121 -0
- plugins/syscalls/kernel_parser.py +36 -0
- plugins/syscalls/kernel_repo_manager.py +106 -0
- plugins/syscalls/nodes.py +119 -0
- plugins/syscalls/syscall_table_parser.py +46 -0
- plugins/syscalls/syscalls_h_parser.py +52 -0
- plugins/types.py +108 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Domain Node and MerkleNode classes for syscall data transformation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
from typing import Iterator, List, Optional
|
|
8
|
+
|
|
9
|
+
from attrs import define, field
|
|
10
|
+
from neogit.core.merkle import MerkleVisitor
|
|
11
|
+
from neogit.core.model import MerkleLabel, MerkleNode, Node
|
|
12
|
+
from neogit.core.visitor import VisitedNode
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@define(auto_attribs=True)
|
|
16
|
+
class SyscallNode(Node):
|
|
17
|
+
"""Represents a single syscall entry (leaf node)."""
|
|
18
|
+
|
|
19
|
+
name: str
|
|
20
|
+
index: int
|
|
21
|
+
entry_point: str
|
|
22
|
+
parameters: Optional[List[str]]
|
|
23
|
+
|
|
24
|
+
def iter_child_nodes(self) -> Iterator[Node]:
|
|
25
|
+
"""Syscall is a leaf node with no children."""
|
|
26
|
+
return iter([])
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@define(auto_attribs=True)
|
|
30
|
+
class SyscallTableNode(Node):
|
|
31
|
+
"""Represents a syscall table for an architecture (internal node)."""
|
|
32
|
+
|
|
33
|
+
architecture: str # e.g., "lief._lief.ELF.ARCH.x86_64"
|
|
34
|
+
syscalls: List[dict] # Raw syscall data from extraction
|
|
35
|
+
|
|
36
|
+
def iter_child_nodes(self) -> Iterator[Node]:
|
|
37
|
+
"""Yield SyscallNode for each syscall in the table."""
|
|
38
|
+
for sc in self.syscalls:
|
|
39
|
+
yield SyscallNode(
|
|
40
|
+
name=sc["name"],
|
|
41
|
+
index=sc["index"],
|
|
42
|
+
entry_point=sc["entry_point"],
|
|
43
|
+
parameters=sc.get("parameters"),
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@define(auto_attribs=True)
|
|
48
|
+
class SyscallMerkleNode(MerkleNode):
|
|
49
|
+
"""Merkle node for a syscall (content-addressed)."""
|
|
50
|
+
|
|
51
|
+
name: str = field(kw_only=True)
|
|
52
|
+
index: int = field(kw_only=True)
|
|
53
|
+
entry_point: str = field(kw_only=True)
|
|
54
|
+
parameters: str = field(kw_only=True) # JSON serialized
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@define(auto_attribs=True)
|
|
58
|
+
class SyscallTableMerkleNode(MerkleNode):
|
|
59
|
+
"""Merkle node for a syscall table (content-addressed)."""
|
|
60
|
+
|
|
61
|
+
architecture: str = field(kw_only=True)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class SyscallsMerkleVisitor(MerkleVisitor):
|
|
65
|
+
"""Visitor for computing merkle hashes of syscall nodes."""
|
|
66
|
+
|
|
67
|
+
def visit_SyscallNode(self, node: SyscallNode, hash_obj: hashlib._Hash, *args, **kwargs):
|
|
68
|
+
"""Visit a syscall leaf node and compute its hash.
|
|
69
|
+
|
|
70
|
+
Hash includes: index + name + entry_point + parameters (sorted)
|
|
71
|
+
"""
|
|
72
|
+
# Serialize parameters to JSON (sorted for determinism)
|
|
73
|
+
params_json = json.dumps(node.parameters, sort_keys=True) if node.parameters else ""
|
|
74
|
+
|
|
75
|
+
# Hash: index + name + entry_point + parameters
|
|
76
|
+
data = f"{node.index}-{node.name}-{node.entry_point}-{params_json}".encode()
|
|
77
|
+
hash_obj.update(data)
|
|
78
|
+
|
|
79
|
+
merkle_node = SyscallMerkleNode( # type: ignore[call-arg]
|
|
80
|
+
hash=hash_obj.hexdigest(),
|
|
81
|
+
label=MerkleLabel.Blob, # Leaf node
|
|
82
|
+
name=node.name,
|
|
83
|
+
index=node.index,
|
|
84
|
+
entry_point=node.entry_point,
|
|
85
|
+
parameters=params_json,
|
|
86
|
+
)
|
|
87
|
+
return VisitedNode(node, merkle_node)
|
|
88
|
+
|
|
89
|
+
def visit_SyscallTableNode(self, node: SyscallTableNode, hash_obj: hashlib._Hash, *args, **kwargs):
|
|
90
|
+
"""Visit a syscall table and aggregate child hashes.
|
|
91
|
+
|
|
92
|
+
Hash includes: all syscall hashes (sorted by index) + architecture
|
|
93
|
+
"""
|
|
94
|
+
children = {}
|
|
95
|
+
|
|
96
|
+
# Sort by index for deterministic ordering
|
|
97
|
+
sorted_syscalls = sorted(node.iter_child_nodes(), key=lambda s: s.index)
|
|
98
|
+
|
|
99
|
+
for syscall_node in sorted_syscalls:
|
|
100
|
+
# Recursively visit child
|
|
101
|
+
visited = self.visit(syscall_node, *args, **kwargs)
|
|
102
|
+
merkle_child = visited.return_value
|
|
103
|
+
|
|
104
|
+
# Accumulate: syscall_name + hash
|
|
105
|
+
data = f"{syscall_node.name}{merkle_child.hash}\n".encode()
|
|
106
|
+
hash_obj.update(data)
|
|
107
|
+
|
|
108
|
+
children[syscall_node.name] = merkle_child
|
|
109
|
+
|
|
110
|
+
# Add table metadata
|
|
111
|
+
hash_obj.update(f"{node.architecture}".encode())
|
|
112
|
+
|
|
113
|
+
merkle_node = SyscallTableMerkleNode( # type: ignore[call-arg]
|
|
114
|
+
hash=hash_obj.hexdigest(),
|
|
115
|
+
children=children,
|
|
116
|
+
label=MerkleLabel.Tree, # Internal node
|
|
117
|
+
architecture=node.architecture,
|
|
118
|
+
)
|
|
119
|
+
return VisitedNode(node, merkle_node)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Syscall table parsing from syscall_64.tbl format."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from .kernel_parser import SyscallIndex
|
|
7
|
+
|
|
8
|
+
# Compiled regex pattern for syscall table line parsing
|
|
9
|
+
# Pattern: <number> <abi> <name> [<entry_point>] [optional_fields...]
|
|
10
|
+
SYSCALL_TABLE_PATTERN = re.compile(r"^(\d+)\s+(common|64|x32)\s+(\w+)(?:\s+(\w+))?")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def parse_syscall_table_line(line: str) -> Optional[SyscallIndex]:
|
|
14
|
+
"""Parse a single line from syscall_64.tbl format.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
line: Table line like '0\tcommon\tread\tsys_read'
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
SyscallIndex instance or None for filtered/invalid lines
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
ValueError: If line format is invalid but not filtered
|
|
24
|
+
"""
|
|
25
|
+
line = line.strip()
|
|
26
|
+
|
|
27
|
+
# Filter out empty lines and comments
|
|
28
|
+
if not line or line.startswith("#"):
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
match = SYSCALL_TABLE_PATTERN.match(line)
|
|
32
|
+
if not match:
|
|
33
|
+
raise ValueError(f"Invalid syscall table line format: {line}")
|
|
34
|
+
|
|
35
|
+
number_str, abi, name, entry_point = match.groups()
|
|
36
|
+
|
|
37
|
+
# Some syscalls don't have entry points defined (like uselib)
|
|
38
|
+
if entry_point is None:
|
|
39
|
+
entry_point = f"sys_{name}"
|
|
40
|
+
|
|
41
|
+
# Filter out x32 ABI for 64-bit focus
|
|
42
|
+
if abi == "x32":
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
index = int(number_str)
|
|
46
|
+
return SyscallIndex(name=name, index=index)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Syscalls.h signature parsing from kernel headers."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class SyscallSignature:
|
|
10
|
+
"""Represents a syscall signature with parameters."""
|
|
11
|
+
|
|
12
|
+
name: str
|
|
13
|
+
parameters: List[str]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_syscall_signature(syscalls_h_content: str, entry_name: str) -> Optional[SyscallSignature]:
|
|
17
|
+
"""Parse syscall signature for a specific entry from syscalls.h content.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
syscalls_h_content: Content of syscalls.h file
|
|
21
|
+
entry_name: Entry point name like 'sys_read'
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
SyscallSignature instance or None if not found
|
|
25
|
+
"""
|
|
26
|
+
# Use Filippo's regex pattern: (?:asmlinkage|unsigned) long {entry}\(([^)]+)\);
|
|
27
|
+
pattern = rf"(?:asmlinkage|unsigned) long {re.escape(entry_name)}\(([^)]+)\)" # noqa: E231
|
|
28
|
+
|
|
29
|
+
# Make multiline and handle whitespace
|
|
30
|
+
match = re.search(pattern, syscalls_h_content, re.MULTILINE | re.DOTALL)
|
|
31
|
+
|
|
32
|
+
if not match:
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
params_str = match.group(1)
|
|
36
|
+
|
|
37
|
+
# Handle void case
|
|
38
|
+
if params_str.strip() == "void":
|
|
39
|
+
parameters: list = []
|
|
40
|
+
else:
|
|
41
|
+
# Split by comma and clean up
|
|
42
|
+
parameters = []
|
|
43
|
+
for param in params_str.split(","):
|
|
44
|
+
param = param.strip()
|
|
45
|
+
# Remove __user qualifier
|
|
46
|
+
param = param.replace("__user ", "")
|
|
47
|
+
parameters.append(param)
|
|
48
|
+
|
|
49
|
+
# Extract syscall name from entry_name (remove sys_ prefix)
|
|
50
|
+
name = entry_name.replace("sys_", "")
|
|
51
|
+
|
|
52
|
+
return SyscallSignature(name=name, parameters=parameters)
|
plugins/types.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# define abstract Plugin Class
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import tempfile
|
|
5
|
+
from abc import abstractmethod
|
|
6
|
+
from contextlib import contextmanager, suppress
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Any, List
|
|
9
|
+
|
|
10
|
+
from attrs import Factory, define, field
|
|
11
|
+
from neogit.model.neo import Commit, PluginRun
|
|
12
|
+
from neogit.service import Neogit
|
|
13
|
+
from neogit.utils import BetterContextManager
|
|
14
|
+
|
|
15
|
+
QUERY_CREATE_UNIQUE_CONSTRAINT = """
|
|
16
|
+
CREATE CONSTRAINT {name}
|
|
17
|
+
IF NOT EXISTS
|
|
18
|
+
FOR (n:{label})
|
|
19
|
+
REQUIRE n.{property} IS UNIQUE
|
|
20
|
+
""".strip()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@define(auto_attribs=True)
|
|
24
|
+
class UniqueConstraint:
|
|
25
|
+
label: str = field()
|
|
26
|
+
property_list: list[str] = field()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@define(auto_attribs=True)
|
|
30
|
+
class AbstractPlugin(BetterContextManager):
|
|
31
|
+
logger: logging.Logger = field(
|
|
32
|
+
init=False,
|
|
33
|
+
default=Factory(
|
|
34
|
+
lambda self: logging.getLogger(f"{self.__module__}.{self.__class__.__name__}"),
|
|
35
|
+
takes_self=True,
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
neogit: Neogit = field(init=False, default=Neogit())
|
|
39
|
+
|
|
40
|
+
def __call__(self, commit: Commit, plugin_name: str, *args: Any, force: bool = False, **kwds: Any) -> Any:
|
|
41
|
+
"""Execute the run method inside a neomodel transaction
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
commit: The commit to process
|
|
45
|
+
plugin_name: Name of the plugin being run
|
|
46
|
+
force: If True, rerun the plugin even if already executed
|
|
47
|
+
"""
|
|
48
|
+
with suppress(IndexError):
|
|
49
|
+
plugin_run_node = commit.plugin.all()[0]
|
|
50
|
+
plugin_date = getattr(plugin_run_node, plugin_name, None)
|
|
51
|
+
if plugin_date is not None and not force:
|
|
52
|
+
self.logger.info(
|
|
53
|
+
"Plugin run node already executed at %s for commit %s", plugin_run_node.filetype, commit.hash
|
|
54
|
+
)
|
|
55
|
+
return
|
|
56
|
+
if plugin_date is not None and force:
|
|
57
|
+
self.logger.info(
|
|
58
|
+
"Force rerun enabled - re-executing plugin for commit %s (previously run at %s)",
|
|
59
|
+
commit.hash,
|
|
60
|
+
plugin_date,
|
|
61
|
+
)
|
|
62
|
+
# can't mix schema modification and write query in the same transaction
|
|
63
|
+
with self.neogit.db.transaction:
|
|
64
|
+
self.ensure_constraints()
|
|
65
|
+
# TODO: fix transaction
|
|
66
|
+
# with self.neogit.db.transaction:
|
|
67
|
+
self.run(commit)
|
|
68
|
+
with self.neogit.db.write_transaction:
|
|
69
|
+
try:
|
|
70
|
+
plugin_run_node = commit.plugin.all()[0]
|
|
71
|
+
self.logger.info("Plugin run node already exists for commit %s", commit.hash)
|
|
72
|
+
except IndexError:
|
|
73
|
+
self.logger.info("Creating plugin run node for commit %s", commit.hash)
|
|
74
|
+
plugin_run_node = PluginRun()
|
|
75
|
+
# node needs to be saved for the connection to be created as well
|
|
76
|
+
plugin_run_node.save()
|
|
77
|
+
# ensure connected
|
|
78
|
+
commit.plugin.connect(plugin_run_node)
|
|
79
|
+
# update plugin run with datetime (timzeone-aware UTC)
|
|
80
|
+
setattr(plugin_run_node, plugin_name, datetime.now(timezone.utc))
|
|
81
|
+
plugin_run_node.save()
|
|
82
|
+
|
|
83
|
+
self.logger.info("Plugin run node updated for commit %s", commit.hash)
|
|
84
|
+
|
|
85
|
+
def ensure_constraints(self):
|
|
86
|
+
"""Ensure the constraints are in the database"""
|
|
87
|
+
for constraint in self.constraints_data():
|
|
88
|
+
for prop in constraint.property_list:
|
|
89
|
+
name = f"{constraint.label.lower()}_{prop}_unique"
|
|
90
|
+
query = QUERY_CREATE_UNIQUE_CONSTRAINT.format(name=name, label=constraint.label, property=prop)
|
|
91
|
+
self.neogit.db.cypher_query(query)
|
|
92
|
+
|
|
93
|
+
def constraints_data(self) -> List[UniqueConstraint]:
|
|
94
|
+
"""Return the constraints data"""
|
|
95
|
+
return []
|
|
96
|
+
|
|
97
|
+
@abstractmethod
|
|
98
|
+
def run(self, commit: Commit):
|
|
99
|
+
"""Run the plugin"""
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
@contextmanager
|
|
103
|
+
def downloaded_file(self, hash: str):
|
|
104
|
+
with tempfile.NamedTemporaryFile(delete=True) as tmp_file:
|
|
105
|
+
for chunk in self.neogit.download_object_as_stream(hash):
|
|
106
|
+
tmp_file.write(chunk)
|
|
107
|
+
tmp_file.flush()
|
|
108
|
+
yield tmp_file.name
|