dissect.target 3.15.dev23__py3-none-any.whl → 3.15.dev25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dissect/target/plugin.py CHANGED
@@ -951,6 +951,10 @@ class NamespacePlugin(Plugin):
951
951
  # the direct subclass of NamespacePlugin
952
952
  cls.__nsplugin__.SUBPLUGINS.add(cls.__namespace__)
953
953
 
954
+ # Generate a tuple of class names for which we do not want to add subplugin functions, which is the
955
+ # namespaceplugin and all of its superclasses (minus the base object).
956
+ reserved_cls_names = tuple({_class.__name__ for _class in cls.__nsplugin__.mro() if _class is not object})
957
+
954
958
  # Collect the public attrs of the subplugin
955
959
  for subplugin_func_name in cls.__exports__:
956
960
  subplugin_func = inspect.getattr_static(cls, subplugin_func_name)
@@ -963,12 +967,15 @@ class NamespacePlugin(Plugin):
963
967
  if getattr(subplugin_func, "__output__", None) != "record":
964
968
  continue
965
969
 
966
- # The method needs to be part of the current subclass and not a parent
967
- if not subplugin_func.__qualname__.startswith(cls.__name__):
970
+ # The method may not be part of a parent class.
971
+ if subplugin_func.__qualname__.startswith(reserved_cls_names):
968
972
  continue
969
973
 
970
974
  # If we already have an aggregate method, skip
971
975
  if existing_aggregator := getattr(cls.__nsplugin__, subplugin_func_name, None):
976
+ if not hasattr(existing_aggregator, "__subplugins__"):
977
+ # This is not an aggregator, but a re-implementation of a subclass function by the subplugin.
978
+ continue
972
979
  existing_aggregator.__subplugins__.append(cls.__namespace__)
973
980
  continue
974
981
 
@@ -978,10 +985,12 @@ class NamespacePlugin(Plugin):
978
985
  for entry in aggregator.__subplugins__:
979
986
  try:
980
987
  subplugin = getattr(self.target, entry)
981
- for item in getattr(subplugin, method_name)():
982
- yield item
983
- except Exception:
988
+ yield from getattr(subplugin, method_name)()
989
+ except UnsupportedPluginError:
984
990
  continue
991
+ except Exception as e:
992
+ self.target.log.error("Subplugin: %s raised an exception for: %s", entry, method_name)
993
+ self.target.log.debug("Exception: %s", e, exc_info=e)
985
994
 
986
995
  # Holds the subplugins that share this method
987
996
  aggregator.__subplugins__ = []
@@ -1,74 +1,196 @@
1
- import enum
2
1
  import itertools
3
2
  import re
4
3
  from datetime import datetime
5
4
  from pathlib import Path
6
- from typing import Iterator, Optional
5
+ from typing import Iterator, NamedTuple, Optional
7
6
 
8
7
  from dissect.target import plugin
9
8
  from dissect.target.exceptions import FileNotFoundError, UnsupportedPluginError
10
9
  from dissect.target.helpers.fsutil import open_decompress
11
- from dissect.target.plugins.apps.webserver.webserver import WebserverAccessLogRecord
10
+ from dissect.target.plugins.apps.webserver.webserver import (
11
+ WebserverAccessLogRecord,
12
+ WebserverErrorLogRecord,
13
+ WebserverPlugin,
14
+ )
12
15
  from dissect.target.target import Target
13
16
 
14
- COMMON_REGEX = r'(?P<remote_ip>.*?) (?P<remote_logname>.*?) (?P<remote_user>.*?) \[(?P<ts>.*)\] "(?P<method>.*?) (?P<uri>.*?) ?(?P<protocol>HTTP\/.*?)?" (?P<status_code>\d{3}) (?P<bytes_sent>-|\d+)' # noqa: E501
15
- REFERER_USER_AGENT_REGEX = r'"(?P<referer>.*?)" "(?P<useragent>.*?)"'
16
17
 
18
+ class LogFormat(NamedTuple):
19
+ name: str
20
+ pattern: re.Pattern
17
21
 
18
- class LogFormat(enum.Enum):
19
- VHOST_COMBINED = re.compile(rf"(?P<server_name>.*?):(?P<port>.*) {COMMON_REGEX} {REFERER_USER_AGENT_REGEX}")
20
- COMBINED = re.compile(rf"{COMMON_REGEX} {REFERER_USER_AGENT_REGEX}")
21
- COMMON = re.compile(COMMON_REGEX)
22
22
 
23
+ # e.g. CustomLog "/custom/log/location/access.log" common
24
+ RE_CONFIG_CUSTOM_LOG_DIRECTIVE = re.compile(
25
+ r"""
26
+ [\s#]* # Optionally prefixed by space(s) or pound sign(s).
27
+ CustomLog # Directive indicating that a custom access log location / format is used.
28
+ \s
29
+ "?(?P<location>[^"\s]+)"? # Location to log to, optionally wrapped in double quotes.
30
+ \s
31
+ (?P<logformat>[^$]+) # Format to use (can be either a format string or a nickname).
32
+ $
33
+ """,
34
+ re.VERBOSE,
35
+ )
23
36
 
24
- def infer_log_format(line: str) -> Optional[LogFormat]:
25
- """Attempt to infer what standard LogFormat is used. Returns None if no known format can be inferred.
37
+ # e.g ErrorLog "/var/log/httpd/error_log"
38
+ RE_CONFIG_ERRORLOG_DIRECTIVE = re.compile(
39
+ r"""
40
+ [\s#]* # Optionally prefixed by space(s) or pound sign(s).
41
+ ErrorLog # Directive indicating that a custom error log location / format is used.
42
+ \s
43
+ "?(?P<location>[^"\s$]+)"? # Location to log to, optionally wrapped in double quotes.
44
+ $
45
+ """,
46
+ re.VERBOSE,
47
+ )
26
48
 
27
- Three default log type examples from Apache (note that the ipv4 could also be ipv6)::
28
- combined = '1.2.3.4 - - [19/Dec/2022:17:25:12 +0100] "GET / HTTP/1.1" 304 247 "-" "Mozilla/5.0
29
- (Windows NT 10.0; Win64; x64); AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0
30
- Safari/537.36"'
31
- common = '1.2.3.4 - - [19/Dec/2022:17:25:40 +0100] "GET / HTTP/1.1" 200 312'
32
- vhost_combined = 'example.com:80 1.2.3.4 - - [19/Dec/2022:17:25:40 +0100] "GET / HTTP/1.1" 200 312 "-"
33
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64); AppleWebKit/537.36 (KHTML, like Gecko)
34
- Chrome/108.0.0.0 Safari/537.36"'
35
- """
49
+ RE_REMOTE_PATTERN = r"""
50
+ (?P<remote_ip>.*?) # Client IP address of the request.
51
+ \s
52
+ (?P<remote_logname>.*?) # Remote logname (from identd, if supplied).
53
+ \s
54
+ (?P<remote_user>.*?) # Remote user if the request was authenticated.
55
+ """
36
56
 
37
- first_part = line.split(" ")[0]
38
- if ":" in first_part and "." in first_part:
39
- # does not start with IP, hence it must be a vhost typed log
40
- return LogFormat.VHOST_COMBINED
41
- elif line[-1:] == '"':
42
- # ends with a quotation mark, meaning three is a user agent
43
- return LogFormat.COMBINED
44
- elif line[-1:].isdigit():
45
- return LogFormat.COMMON
46
- return None
57
+ RE_REFERER_USER_AGENT_PATTERN = r"""
58
+ "(?P<referer>.*?)" # Value of the 'Referer' HTTP Header.
59
+ \s
60
+ "(?P<useragent>.*?)" # Value of the 'User-Agent' HTTP Header.
61
+ """
47
62
 
63
+ RE_RESPONSE_TIME_PATTERN = r"""
64
+ (
65
+ "
66
+ Time:\s
67
+ (?P<response_time>.*?) # Time taken to serve the response, including a unit of measurement.
68
+ "
69
+ )
70
+ """
48
71
 
49
- class ApachePlugin(plugin.Plugin):
72
+ RE_ACCESS_COMMON_PATTERN = r"""
73
+ \[(?P<ts>[^\]]*)\] # Timestamp including milliseconds.
74
+ \s
75
+ (\[(?P<pid>[0-9]+)\]\s)? # The process ID of the child that serviced the request (optional).
76
+ "
77
+ (?P<method>.*?) # The HTTP Method used for the request.
78
+ \s
79
+ (?P<uri>.*?) # The HTTP URI of the request.
80
+ \s
81
+ ?(?P<protocol>HTTP\/.*?)? # The request protocol.
82
+ "
83
+ \s
84
+ (?P<status_code>\d{3}) # The HTTP Status Code of the response.
85
+ \s
86
+ (?P<bytes_sent>-|\d+) # Bytes sent, including headers.
87
+ """
88
+
89
+ RE_ERROR_COMMON_PATTERN = r"""
90
+ \[
91
+ (?P<ts>[^\]]*) # Timestamp including milliseconds.
92
+ \]
93
+ \s
94
+ \[
95
+ (?P<module>[^:]*) # Name of the module logging the message.
96
+ \:
97
+ (?P<level>[^]]*) # Loglevel of the message.
98
+ \]
99
+ \s
100
+ \[
101
+ pid\s(?P<pid>\d*) # Process ID of current process.
102
+ (\:tid\s(?P<tid>\d*))? # Thread ID of current thread (optional).
103
+ \]
104
+ \s
105
+ ((?P<error_source>[^\:]*)\:\s)? # Source file name and line number of the log call (optional).
106
+ (
107
+ \[
108
+ client\s(?P<client>[^]]+) # Client IP address and port of the request (optional).
109
+ \]\s
110
+ )?
111
+ ((?P<error_code>\w+)\:\s)? # APR/OS error status code and string (optional).
112
+ (?P<message>.*) # The actual log message.
113
+ """
114
+
115
+ LOG_FORMAT_ACCESS_COMMON = LogFormat(
116
+ "common",
117
+ re.compile(
118
+ rf"{RE_REMOTE_PATTERN}\s{RE_ACCESS_COMMON_PATTERN}",
119
+ re.VERBOSE,
120
+ ),
121
+ )
122
+ LOG_FORMAT_ACCESS_VHOST_COMBINED = LogFormat(
123
+ "vhost_combined",
124
+ re.compile(
125
+ rf"""
126
+ (?P<server_name>.*?):(?P<port>.*)
127
+ \s
128
+ {RE_REMOTE_PATTERN}
129
+ \s
130
+ {RE_ACCESS_COMMON_PATTERN}
131
+ \s
132
+ {RE_REFERER_USER_AGENT_PATTERN}
133
+ """,
134
+ re.VERBOSE,
135
+ ),
136
+ )
137
+ LOG_FORMAT_ACCESS_COMBINED = LogFormat(
138
+ "combined",
139
+ re.compile(
140
+ rf"{RE_REMOTE_PATTERN}\s{RE_ACCESS_COMMON_PATTERN}\s{RE_REFERER_USER_AGENT_PATTERN}",
141
+ re.VERBOSE,
142
+ ),
143
+ )
144
+ LOG_FORMAT_ERROR_COMMON = LogFormat("error", re.compile(RE_ERROR_COMMON_PATTERN, re.VERBOSE))
145
+
146
+
147
+ def apache_response_time_to_ms(time_str: str) -> int:
148
+ """Convert a string containing amount and measurement (e.g. '10000 microsecs') to milliseconds."""
149
+ amount, _, measurement = time_str.partition(" ")
150
+ amount = int(amount)
151
+ if measurement == "microsecs":
152
+ return amount // 1000
153
+ raise ValueError(f"Could not parse {time_str}")
154
+
155
+
156
+ class ApachePlugin(WebserverPlugin):
50
157
  """Apache log parsing plugin.
51
158
 
52
- Apache has three default log formats, which this plugin can all parse automatically. These are::
159
+ Apache has three default access log formats, which this plugin can all parse automatically. These are::
160
+
53
161
  LogFormat "%v:%p %h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"" vhost_combined
54
162
  LogFormat "%h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"" combined
55
- LogFormat "%h %l %u %t \"%r\" %>s %O" common
163
+ LogFormat "%h %l %u %t \"%r\" %>s %O"`` common
56
164
 
57
165
  For the definitions of each format string, see https://httpd.apache.org/docs/2.4/mod/mod_log_config.html#formats
58
- """
166
+
167
+ For Apache, the error logs by default follow the following format::
168
+
169
+ ErrorLogFormat ``"[%{u}t] [%-m:%l] [pid %P:tid %T] %7F: %E: [client\ %a] %M% ,\ referer\ %{Referer}i"``
170
+ """ # noqa: E501, W605
59
171
 
60
172
  __namespace__ = "apache"
61
173
 
174
+ DEFAULT_LOG_DIRS = ["/var/log/apache2", "/var/log/apache", "/var/log/httpd", "/var/log"]
175
+ ACCESS_LOG_NAMES = ["access.log", "access_log", "httpd-access.log"]
176
+ ERROR_LOG_NAMES = ["error.log"]
177
+ DEFAULT_CONFIG_PATHS = [
178
+ "/etc/apache2/apache2.conf",
179
+ "/usr/local/etc/apache22/httpd.conf",
180
+ "/etc/httpd/conf/httpd.conf",
181
+ "/etc/httpd.conf",
182
+ ]
183
+
62
184
  def __init__(self, target: Target):
63
185
  super().__init__(target)
64
- self.log_paths = self.get_log_paths()
186
+ self.access_log_paths, self.error_log_paths = self.get_log_paths()
65
187
 
66
188
  def check_compatible(self) -> None:
67
- if not len(self.log_paths):
189
+ if not len(self.access_log_paths) and not len(self.error_log_paths):
68
190
  raise UnsupportedPluginError("No Apache directories found")
69
191
 
70
192
  @plugin.internal
71
- def get_log_paths(self) -> list[Path]:
193
+ def get_log_paths(self) -> tuple[list[Path], list[Path]]:
72
194
  """
73
195
  Discover any present Apache log paths on the target system.
74
196
 
@@ -77,83 +199,175 @@ class ApachePlugin(plugin.Plugin):
77
199
  - https://unix.stackexchange.com/a/269090
78
200
  """
79
201
 
80
- log_paths = []
202
+ access_log_paths = set()
203
+ error_log_paths = set()
81
204
 
82
205
  # Check if any well known default Apache log locations exist
83
- default_log_dirs = ["/var/log/apache2", "/var/log/apache", "/var/log/httpd", "/var/log"]
84
- default_log_names = ["access.log", "access_log", "httpd-access.log"]
85
- for log_dir, log_name in itertools.product(default_log_dirs, default_log_names):
86
- log_paths.extend(self.target.fs.path(log_dir).glob(log_name + "*"))
87
-
88
- # Check default Apache configs for their CustomLog directive
89
- default_config_paths = [
90
- "/etc/apache2/apache2.conf",
91
- "/usr/local/etc/apache22/httpd.conf",
92
- "/etc/httpd/conf/httpd.conf",
93
- ]
94
-
95
- for config in default_config_paths:
206
+ for log_dir, log_name in itertools.product(self.DEFAULT_LOG_DIRS, self.ACCESS_LOG_NAMES):
207
+ access_log_paths.update(self.target.fs.path(log_dir).glob(f"{log_name}*"))
208
+
209
+ for log_dir, log_name in itertools.product(self.DEFAULT_LOG_DIRS, self.ERROR_LOG_NAMES):
210
+ error_log_paths.update(self.target.fs.path(log_dir).glob(f"{log_name}*"))
211
+
212
+ # Check default Apache configs for CustomLog or ErrorLog directives
213
+ for config in self.DEFAULT_CONFIG_PATHS:
96
214
  if (path := self.target.fs.path(config)).exists():
97
215
  for line in path.open("rt"):
98
216
  line = line.strip()
99
217
 
100
- if not line or "CustomLog" not in line:
218
+ if not line or ("CustomLog" not in line and "ErrorLog" not in line):
101
219
  continue
102
220
 
103
- try:
104
- # CustomLog "/custom/log/location/access.log" common
105
- log_path = line.split("CustomLog")[1].strip().split(" ")[0].replace('"', "")
106
- custom_log = self.target.fs.path(log_path)
107
- log_paths.extend(
108
- path for path in custom_log.parent.glob(f"{custom_log.name}*") if path not in log_paths
109
- )
110
- except IndexError:
221
+ if "ErrorLog" in line:
222
+ set_to_update = error_log_paths
223
+ pattern_to_use = RE_CONFIG_ERRORLOG_DIRECTIVE
224
+ else:
225
+ set_to_update = access_log_paths
226
+ pattern_to_use = RE_CONFIG_CUSTOM_LOG_DIRECTIVE
227
+
228
+ match = pattern_to_use.match(line)
229
+ if not match:
111
230
  self.target.log.warning("Unexpected Apache log configuration: %s (%s)", line, path)
231
+ continue
232
+
233
+ directive = match.groupdict()
234
+ custom_log = self.target.fs.path(directive["location"])
235
+ set_to_update.update(path for path in custom_log.parent.glob(f"{custom_log.name}*"))
112
236
 
113
- return log_paths
237
+ return sorted(access_log_paths), sorted(error_log_paths)
114
238
 
115
239
  @plugin.export(record=WebserverAccessLogRecord)
116
240
  def access(self) -> Iterator[WebserverAccessLogRecord]:
117
- """Return contents of Apache access log files in unified WebserverAccessLogRecord format."""
118
- for path in self.log_paths:
241
+ """Return contents of Apache access log files in unified ``WebserverAccessLogRecord`` format."""
242
+ for line, path in self._iterate_log_lines(self.access_log_paths):
243
+ try:
244
+ logformat = self.infer_access_log_format(line)
245
+ if not logformat:
246
+ self.target.log.warning("Apache log format could not be inferred for log line: %s (%s)", line, path)
247
+ continue
248
+
249
+ match = logformat.pattern.match(line)
250
+ if not match:
251
+ self.target.log.warning(
252
+ "Could not match Apache log format %s for log line: %s (%s)", logformat.name, line, path
253
+ )
254
+ continue
255
+
256
+ log = match.groupdict()
257
+ if response_time := log.get("response_time"):
258
+ response_time = apache_response_time_to_ms(response_time)
259
+
260
+ yield WebserverAccessLogRecord(
261
+ ts=datetime.strptime(log["ts"], "%d/%b/%Y:%H:%M:%S %z"),
262
+ remote_user=log["remote_user"],
263
+ remote_ip=log["remote_ip"],
264
+ local_ip=log.get("local_ip"),
265
+ method=log["method"],
266
+ uri=log["uri"],
267
+ protocol=log["protocol"],
268
+ status_code=log["status_code"],
269
+ bytes_sent=log["bytes_sent"].strip("-") or 0,
270
+ pid=log.get("pid"),
271
+ referer=log.get("referer"),
272
+ useragent=log.get("useragent"),
273
+ response_time_ms=response_time,
274
+ source=path,
275
+ _target=self.target,
276
+ )
277
+
278
+ except Exception as e:
279
+ self.target.log.warning("An error occured parsing Apache log file %s: %s", path, str(e))
280
+ self.target.log.debug("", exc_info=e)
281
+
282
+ @plugin.export(record=WebserverErrorLogRecord)
283
+ def error(self) -> Iterator[WebserverErrorLogRecord]:
284
+ """Return contents of Apache error log files in unified ``WebserverErrorLogRecord`` format."""
285
+ for line, path in self._iterate_log_lines(self.error_log_paths):
286
+ try:
287
+ match = LOG_FORMAT_ERROR_COMMON.pattern.match(line)
288
+ if not match:
289
+ self.target.log.warning("Could not match Apache error log format for log line: %s (%s)", line, path)
290
+ continue
291
+
292
+ log = match.groupdict()
293
+ remote_ip = log.get("client")
294
+ if remote_ip and ":" in remote_ip:
295
+ remote_ip, _, port = remote_ip.rpartition(":")
296
+ error_source = log.get("error_source")
297
+ error_code = log.get("error_code")
298
+
299
+ # Both error_source and error_code follow the same logformat. When both are present, the error source
300
+ # goes before the client and the error code goes after. However, it is also possible that only the error
301
+ # code is available, in which case it is situated *after* the client. In such situations our regex match
302
+ # has assigned the variables wrong, and we need to do a swap.
303
+ if error_source and error_code is None:
304
+ error_source, error_code = error_code, error_source
305
+
306
+ # Unlike with access logs, ErrorLogFormat doesn't log the offset to UTC but insteads logs in local time.
307
+ ts = self.target.datetime.local(datetime.strptime(log["ts"], "%a %b %d %H:%M:%S.%f %Y"))
308
+
309
+ yield WebserverErrorLogRecord(
310
+ ts=ts,
311
+ pid=log.get("pid"),
312
+ remote_ip=remote_ip,
313
+ module=log["module"],
314
+ level=log["level"],
315
+ error_source=error_source,
316
+ error_code=error_code,
317
+ message=log["message"],
318
+ source=path,
319
+ _target=self.target,
320
+ )
321
+
322
+ except Exception as e:
323
+ self.target.log.warning("An error occured parsing Apache log file %s: %s", path, str(e))
324
+ self.target.log.debug("", exc_info=e)
325
+
326
+ def _iterate_log_lines(self, paths: list[Path]) -> Iterator[tuple[str, Path]]:
327
+ """Iterate through a list of paths and yield tuples of loglines and the path of the file where they're from."""
328
+ for path in paths:
119
329
  try:
120
330
  path = path.resolve(strict=True)
121
331
  for line in open_decompress(path, "rt"):
122
332
  line = line.strip()
123
333
  if not line:
124
334
  continue
335
+ yield line, path
336
+ except FileNotFoundError:
337
+ self.target.log.warning("Apache log file configured but could not be found (dead symlink?): %s", path)
125
338
 
126
- fmt = infer_log_format(line)
127
- if not fmt:
128
- self.target.log.warning(
129
- "Apache log format could not be inferred for log line: %s (%s)", line, path
130
- )
131
- continue
339
+ @staticmethod
340
+ def infer_access_log_format(line: str) -> Optional[LogFormat]:
341
+ """Attempt to infer what standard LogFormat is used. Returns None if no known format can be inferred.
132
342
 
133
- match = fmt.value.match(line)
134
- if not match:
135
- self.target.log.warning(
136
- "Could not match Apache log format %s for log line: %s (%s)", fmt, line, path
137
- )
138
- continue
343
+ Three default log type examples from Apache (note that the ipv4 could also be ipv6)
139
344
 
140
- log = match.groupdict()
141
- yield WebserverAccessLogRecord(
142
- ts=datetime.strptime(log["ts"], "%d/%b/%Y:%H:%M:%S %z"),
143
- remote_user=log["remote_user"],
144
- remote_ip=log["remote_ip"],
145
- method=log["method"],
146
- uri=log["uri"],
147
- protocol=log["protocol"],
148
- status_code=log["status_code"],
149
- bytes_sent=log["bytes_sent"].strip("-") or 0,
150
- referer=log.get("referer"),
151
- useragent=log.get("useragent"),
152
- source=path,
153
- _target=self.target,
154
- )
155
- except FileNotFoundError:
156
- self.target.log.warning("Apache log file configured but could not be found (dead symlink?): %s", path)
157
- except Exception as e:
158
- self.target.log.warning("An error occured parsing Apache log file %s: %s", path, str(e))
159
- self.target.log.debug("", exc_info=e)
345
+
346
+ Combined::
347
+
348
+ 1.2.3.4 - - [19/Dec/2022:17:25:12 +0100] "GET / HTTP/1.1" 304 247 "-" "Mozilla/5.0
349
+ (Windows NT 10.0; Win64; x64); AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0
350
+ Safari/537.36\"
351
+
352
+ Common::
353
+
354
+ 1.2.3.4 - - [19/Dec/2022:17:25:40 +0100] "GET / HTTP/1.1" 200 312
355
+
356
+ vhost_combined::
357
+
358
+ example.com:80 1.2.3.4 - - [19/Dec/2022:17:25:40 +0100] "GET / HTTP/1.1" 200 312 "-"
359
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64); AppleWebKit/537.36 (KHTML, like Gecko)
360
+ Chrome/108.0.0.0 Safari/537.36\"
361
+ """
362
+ parts = line.split()
363
+ first_part = parts[0]
364
+ if ":" in first_part and "." in first_part:
365
+ # does not start with IP, hence it must be a vhost typed log
366
+ return LOG_FORMAT_ACCESS_VHOST_COMBINED
367
+ elif line[-1] == '"':
368
+ # ends with a quotation mark but does not contain a response time, meaning there is only a user agent
369
+ return LOG_FORMAT_ACCESS_COMBINED
370
+ elif line[-1].isdigit():
371
+ return LOG_FORMAT_ACCESS_COMMON
372
+
373
+ return None
@@ -9,7 +9,10 @@ from dissect.util.ts import from_unix
9
9
  from dissect.target import plugin
10
10
  from dissect.target.exceptions import FileNotFoundError, UnsupportedPluginError
11
11
  from dissect.target.helpers.fsutil import basename, open_decompress
12
- from dissect.target.plugins.apps.webserver.webserver import WebserverAccessLogRecord
12
+ from dissect.target.plugins.apps.webserver.webserver import (
13
+ WebserverAccessLogRecord,
14
+ WebserverPlugin,
15
+ )
13
16
  from dissect.target.target import Target
14
17
 
15
18
  LOG_FILE_REGEX = re.compile(r"(log|output file) (?P<log_file>.*)( \{)?$")
@@ -18,7 +21,7 @@ LOG_REGEX = re.compile(
18
21
  )
19
22
 
20
23
 
21
- class CaddyPlugin(plugin.Plugin):
24
+ class CaddyPlugin(WebserverPlugin):
22
25
  __namespace__ = "caddy"
23
26
 
24
27
  def __init__(self, target: Target):
@@ -0,0 +1,82 @@
1
+ import re
2
+
3
+ from dissect.target.exceptions import UnsupportedPluginError
4
+ from dissect.target.plugin import OperatingSystem
5
+ from dissect.target.plugins.apps.webserver.apache import (
6
+ RE_ACCESS_COMMON_PATTERN,
7
+ RE_REFERER_USER_AGENT_PATTERN,
8
+ RE_REMOTE_PATTERN,
9
+ RE_RESPONSE_TIME_PATTERN,
10
+ ApachePlugin,
11
+ LogFormat,
12
+ )
13
+
14
+ LOG_FORMAT_CITRIX_NETSCALER_ACCESS_COMBINED_RESPONSE_TIME = LogFormat(
15
+ "combined_resptime",
16
+ re.compile(
17
+ rf"""
18
+ {RE_REMOTE_PATTERN} # remote_ip, remote_logname, remote_user
19
+ \s
20
+ {RE_ACCESS_COMMON_PATTERN} # Timestamp, pid, method, uri, protocol, status code, bytes_sent
21
+ \s
22
+ {RE_REFERER_USER_AGENT_PATTERN} # Referer, user_agent
23
+ \s
24
+ {RE_RESPONSE_TIME_PATTERN} # Response time
25
+ """,
26
+ re.VERBOSE,
27
+ ),
28
+ )
29
+
30
+ LOG_FORMAT_CITRIX_NETSCALER_ACCESS_COMBINED_RESPONSE_TIME_WITH_HEADERS = LogFormat(
31
+ "combined_resptime_with_citrix_hdrs",
32
+ re.compile(
33
+ rf"""
34
+ (?P<remote_ip>.*?) # Client IP address of the request.
35
+ \s
36
+ ->
37
+ \s
38
+ (?P<local_ip>.*?) # Local IP of the Netscaler.
39
+ \s
40
+ (?P<remote_logname>.*?) # Remote logname (from identd, if supplied).
41
+ \s
42
+ (?P<remote_user>.*?) # Remote user if the request was authenticated.
43
+ \s
44
+ {RE_ACCESS_COMMON_PATTERN} # Timestamp, pid, method, uri, protocol, status code, bytes_sent
45
+ \s
46
+ {RE_REFERER_USER_AGENT_PATTERN} # Referer, user_agent
47
+ \s
48
+ {RE_RESPONSE_TIME_PATTERN} # Response time
49
+ """,
50
+ re.VERBOSE,
51
+ ),
52
+ )
53
+
54
+
55
+ class CitrixWebserverPlugin(ApachePlugin):
56
+ """Apache log parsing plugin for Citrix specific logs.
57
+
58
+ Citrix uses Apache with custom access log formats. These are::
59
+
60
+ LogFormat "%{Citrix-ns-orig-srcip}i -> %{Citrix-ns-orig-destip}i %l %u %t [%P] \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"Time: %D microsecs\"" combined_resptime_with_citrix_hdrs
61
+ LogFormat "%a %l %u %t [%P] \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"Time: %D microsecs\"" combined_resptime
62
+ """ # noqa: E501, W605
63
+
64
+ __namespace__ = "citrix"
65
+
66
+ ACCESS_LOG_NAMES = ApachePlugin.ACCESS_LOG_NAMES + ["httpaccess.log", "httpaccess-vpn.log"]
67
+ ERROR_LOG_NAMES = ApachePlugin.ERROR_LOG_NAMES + ["httperror.log", "httperror-vpn.log"]
68
+
69
+ def check_compatible(self) -> None:
70
+ if not self.target.os == OperatingSystem.CITRIX:
71
+ raise UnsupportedPluginError("Target is not a Citrix Netscaler")
72
+
73
+ @staticmethod
74
+ def infer_access_log_format(line: str) -> LogFormat:
75
+ splitted_line = line.split()
76
+ second_part = splitted_line[1]
77
+ if second_part == "->":
78
+ return LOG_FORMAT_CITRIX_NETSCALER_ACCESS_COMBINED_RESPONSE_TIME_WITH_HEADERS
79
+ if "Time: " in line:
80
+ return LOG_FORMAT_CITRIX_NETSCALER_ACCESS_COMBINED_RESPONSE_TIME
81
+
82
+ return ApachePlugin.infer_access_log_format(line)
@@ -11,7 +11,10 @@ from dissect.target import plugin
11
11
  from dissect.target.exceptions import FileNotFoundError as DissectFileNotFoundError
12
12
  from dissect.target.exceptions import PluginError, UnsupportedPluginError
13
13
  from dissect.target.helpers.record import TargetRecordDescriptor
14
- from dissect.target.plugins.apps.webserver.webserver import WebserverAccessLogRecord
14
+ from dissect.target.plugins.apps.webserver.webserver import (
15
+ WebserverAccessLogRecord,
16
+ WebserverPlugin,
17
+ )
15
18
 
16
19
  LOG_RECORD_NAME = "filesystem/windows/iis/logs"
17
20
 
@@ -41,7 +44,7 @@ BasicRecordDescriptor = TargetRecordDescriptor(LOG_RECORD_NAME, BASIC_RECORD_FIE
41
44
  FIELD_NAME_INVALID_CHARS_RE = re.compile(r"[^a-zA-Z0-9]")
42
45
 
43
46
 
44
- class IISLogsPlugin(plugin.Plugin):
47
+ class IISLogsPlugin(WebserverPlugin):
45
48
  """IIS 7 (and above) logs plugin.
46
49
 
47
50
  References:
@@ -6,7 +6,10 @@ from typing import Iterator
6
6
  from dissect.target import plugin
7
7
  from dissect.target.exceptions import FileNotFoundError, UnsupportedPluginError
8
8
  from dissect.target.helpers.fsutil import open_decompress
9
- from dissect.target.plugins.apps.webserver.webserver import WebserverAccessLogRecord
9
+ from dissect.target.plugins.apps.webserver.webserver import (
10
+ WebserverAccessLogRecord,
11
+ WebserverPlugin,
12
+ )
10
13
  from dissect.target.target import Target
11
14
 
12
15
  LOG_REGEX = re.compile(
@@ -14,7 +17,7 @@ LOG_REGEX = re.compile(
14
17
  )
15
18
 
16
19
 
17
- class NginxPlugin(plugin.Plugin):
20
+ class NginxPlugin(WebserverPlugin):
18
21
  __namespace__ = "nginx"
19
22
 
20
23
  def __init__(self, target: Target):
@@ -1,16 +1,16 @@
1
- from typing import Iterator
1
+ from typing import Iterator, Union
2
2
 
3
- from dissect.target.exceptions import UnsupportedPluginError
4
3
  from dissect.target.helpers.record import TargetRecordDescriptor
5
- from dissect.target.plugin import Plugin, export
6
- from dissect.target.target import Target
4
+ from dissect.target.plugin import NamespacePlugin, export
7
5
 
8
6
  WebserverAccessLogRecord = TargetRecordDescriptor(
9
- "application/log/webserver",
7
+ "application/log/webserver/access",
10
8
  [
11
9
  ("datetime", "ts"),
12
10
  ("string", "remote_user"),
13
11
  ("net.ipaddress", "remote_ip"),
12
+ ("net.ipaddress", "local_ip"),
13
+ ("varint", "pid"),
14
14
  ("string", "method"),
15
15
  ("uri", "uri"),
16
16
  ("string", "protocol"),
@@ -18,49 +18,33 @@ WebserverAccessLogRecord = TargetRecordDescriptor(
18
18
  ("varint", "bytes_sent"),
19
19
  ("uri", "referer"),
20
20
  ("string", "useragent"),
21
+ ("varint", "response_time_ms"),
21
22
  ("path", "source"),
22
23
  ],
23
24
  )
24
25
 
26
+ WebserverErrorLogRecord = TargetRecordDescriptor(
27
+ "application/log/webserver/error",
28
+ [
29
+ ("datetime", "ts"),
30
+ ("net.ipaddress", "remote_ip"),
31
+ ("varint", "pid"),
32
+ ("string", "module"),
33
+ ("string", "level"),
34
+ ("string", "error_source"),
35
+ ("string", "error_code"),
36
+ ("string", "message"),
37
+ ("path", "source"),
38
+ ],
39
+ )
25
40
 
26
- class WebserverPlugin(Plugin):
41
+
42
+ class WebserverPlugin(NamespacePlugin):
27
43
  __namespace__ = "webserver"
28
44
  __findable__ = False
29
45
 
30
- WEBSERVERS = [
31
- "apache",
32
- "nginx",
33
- "iis",
34
- "caddy",
35
- ]
36
-
37
- def __init__(self, target: Target):
38
- super().__init__(target)
39
- self._plugins = []
40
- for entry in self.WEBSERVERS:
41
- try:
42
- self._plugins.append(getattr(self.target, entry))
43
- except Exception: # noqa
44
- target.log.exception("Failed to load webserver plugin: %s", entry)
45
-
46
- def check_compatible(self) -> None:
47
- if not len(self._plugins):
48
- raise UnsupportedPluginError("No compatible webserver plugins found")
49
-
50
- def _func(self, f: str) -> Iterator[WebserverAccessLogRecord]:
51
- for p in self._plugins:
52
- try:
53
- yield from getattr(p, f)()
54
- except Exception:
55
- self.target.log.exception("Failed to execute webserver plugin: %s.%s", p._name, f)
56
-
57
- @export(record=WebserverAccessLogRecord)
58
- def logs(self) -> Iterator[WebserverAccessLogRecord]:
46
+ @export(record=[WebserverAccessLogRecord, WebserverErrorLogRecord])
47
+ def logs(self) -> Iterator[Union[WebserverAccessLogRecord, WebserverErrorLogRecord]]:
59
48
  """Returns log file records from installed webservers."""
60
49
  yield from self.access()
61
- # TODO: In the future we should add error logs too.
62
-
63
- @export(record=WebserverAccessLogRecord)
64
- def access(self) -> Iterator[WebserverAccessLogRecord]:
65
- """Returns WebserverAccessLogRecord records from installed webservers."""
66
- yield from self._func("access")
50
+ yield from self.error()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dissect.target
3
- Version: 3.15.dev23
3
+ Version: 3.15.dev25
4
4
  Summary: This module ties all other Dissect modules together, it provides a programming API and command line tools which allow easy access to various data sources inside disk images or file collections (a.k.a. targets)
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -3,7 +3,7 @@ dissect/target/container.py,sha256=9ixufT1_0WhraqttBWwQjG80caToJqvCX8VjFk8d5F0,9
3
3
  dissect/target/exceptions.py,sha256=VVW_Rq_vQinapz-2mbJ3UkxBEZpb2pE_7JlhMukdtrY,2877
4
4
  dissect/target/filesystem.py,sha256=r7JxYP1oI6fy6F29-7FCZZkldnn516d5_XQ7QhQHnH4,53765
5
5
  dissect/target/loader.py,sha256=0-LcZNi7S0qsXR7XGtrzxpuCh9BsLcqNR1T15O7SnBM,7257
6
- dissect/target/plugin.py,sha256=MyBjC7uJ-qml9SQMQ6xsNMdudsOFNJiHNMRn-AFi_pM,47405
6
+ dissect/target/plugin.py,sha256=vEk-jZdhPKhD7rxRuWGb9XAjHRXewWjflC03qOIF3rI,48113
7
7
  dissect/target/report.py,sha256=06uiP4MbNI8cWMVrC1SasNS-Yg6ptjVjckwj8Yhe0Js,7958
8
8
  dissect/target/target.py,sha256=CuqLTD3fwr4HIxtDgN_fwJ3UHSqe5PhNJlLTVGsluB8,31908
9
9
  dissect/target/volume.py,sha256=aQZAJiny8jjwkc9UtwIRwy7nINXjCxwpO-_UDfh6-BA,15801
@@ -136,11 +136,12 @@ dissect/target/plugins/apps/vpn/wireguard.py,sha256=45WvCqQQGrG3DVDH5ghcsGpM_Bom
136
136
  dissect/target/plugins/apps/webhosting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
137
  dissect/target/plugins/apps/webhosting/cpanel.py,sha256=OeFQnu9GmpffIlFyK-AR2Qf8tjyMhazWEAUyccDU5y0,2979
138
138
  dissect/target/plugins/apps/webserver/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
- dissect/target/plugins/apps/webserver/apache.py,sha256=_dZVlQMo5bngVkuQorz6QgjWxDOSLgHm2bpUqVcD2js,7182
140
- dissect/target/plugins/apps/webserver/caddy.py,sha256=Ik0mXNUpYg3dMA5M_8n5zjaFhNd-tYhM7gRPaf9ASa0,6390
141
- dissect/target/plugins/apps/webserver/iis.py,sha256=BVYwLn25n5wjX5X5uedmJOFE7CNae59twUeHuXYkPZc,14637
142
- dissect/target/plugins/apps/webserver/nginx.py,sha256=nO-YGx68Hw_meSqp2u_d8hQB7NKsx2d09b-H9ngrUKo,4097
143
- dissect/target/plugins/apps/webserver/webserver.py,sha256=_rkI0FRF4b3cUqSix_c00NoPYCfc6_GErt72sP2Jngk,2156
139
+ dissect/target/plugins/apps/webserver/apache.py,sha256=H38Zj41EkfS27x98gBTuPHJmTOmlhfMK73PX6zQ4YOY,14933
140
+ dissect/target/plugins/apps/webserver/caddy.py,sha256=qZsAK_tILGvroV4SWkDKc-Otwd41bUEtv9H9TuHmt-0,6422
141
+ dissect/target/plugins/apps/webserver/citrix.py,sha256=FEPdBteEJeeGg3B95W_27O9wLJVhenEc5A5fSLDmK18,3044
142
+ dissect/target/plugins/apps/webserver/iis.py,sha256=UwRVzLqnKScijdLoZFfpkSUzKTQosicZpn16q__4QBU,14669
143
+ dissect/target/plugins/apps/webserver/nginx.py,sha256=WA5soi1FU1c44oHRcyOoHK3gH8Jzc_Qi5uXcimDYukw,4129
144
+ dissect/target/plugins/apps/webserver/webserver.py,sha256=a7a2lLrhsa9c1AXnwiLP-tqVv-IUbmaVaSZI5S0fKa8,1500
144
145
  dissect/target/plugins/child/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
145
146
  dissect/target/plugins/child/esxi.py,sha256=GfgQzxntcHcyxAE2QjMJ-TrFhklweSXLbYh0uuv-klg,693
146
147
  dissect/target/plugins/child/hyperv.py,sha256=R2qVeu4p_9V53jO-65znN0LwX9v3FVA-9jbbtOQcEz8,2236
@@ -317,10 +318,10 @@ dissect/target/volumes/luks.py,sha256=OmCMsw6rCUXG1_plnLVLTpsvE1n_6WtoRUGQbpmu1z
317
318
  dissect/target/volumes/lvm.py,sha256=wwQVR9I3G9YzmY6UxFsH2Y4MXGBcKL9aayWGCDTiWMU,2269
318
319
  dissect/target/volumes/md.py,sha256=j1K1iKmspl0C_OJFc7-Q1BMWN2OCC5EVANIgVlJ_fIE,1673
319
320
  dissect/target/volumes/vmfs.py,sha256=-LoUbn9WNwTtLi_4K34uV_-wDw2W5hgaqxZNj4UmqAQ,1730
320
- dissect.target-3.15.dev23.dist-info/COPYRIGHT,sha256=m-9ih2RVhMiXHI2bf_oNSSgHgkeIvaYRVfKTwFbnJPA,301
321
- dissect.target-3.15.dev23.dist-info/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
322
- dissect.target-3.15.dev23.dist-info/METADATA,sha256=fDcJUO3AZyaTwQKqSs910EeZLpf8wLhykMH8u3Kff6Y,11113
323
- dissect.target-3.15.dev23.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
324
- dissect.target-3.15.dev23.dist-info/entry_points.txt,sha256=tvFPa-Ap-gakjaPwRc6Fl6mxHzxEZ_arAVU-IUYeo_s,447
325
- dissect.target-3.15.dev23.dist-info/top_level.txt,sha256=Mn-CQzEYsAbkxrUI0TnplHuXnGVKzxpDw_po_sXpvv4,8
326
- dissect.target-3.15.dev23.dist-info/RECORD,,
321
+ dissect.target-3.15.dev25.dist-info/COPYRIGHT,sha256=m-9ih2RVhMiXHI2bf_oNSSgHgkeIvaYRVfKTwFbnJPA,301
322
+ dissect.target-3.15.dev25.dist-info/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
323
+ dissect.target-3.15.dev25.dist-info/METADATA,sha256=EhglYTVaAYzVR_mUdsZKJJDV0Rcmwzsw9D_1mw9u5mg,11113
324
+ dissect.target-3.15.dev25.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
325
+ dissect.target-3.15.dev25.dist-info/entry_points.txt,sha256=tvFPa-Ap-gakjaPwRc6Fl6mxHzxEZ_arAVU-IUYeo_s,447
326
+ dissect.target-3.15.dev25.dist-info/top_level.txt,sha256=Mn-CQzEYsAbkxrUI0TnplHuXnGVKzxpDw_po_sXpvv4,8
327
+ dissect.target-3.15.dev25.dist-info/RECORD,,