dissect.target 3.15.dev23__py3-none-any.whl → 3.15.dev25__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
dissect/target/plugin.py CHANGED
@@ -951,6 +951,10 @@ class NamespacePlugin(Plugin):
951
951
  # the direct subclass of NamespacePlugin
952
952
  cls.__nsplugin__.SUBPLUGINS.add(cls.__namespace__)
953
953
 
954
+ # Generate a tuple of class names for which we do not want to add subplugin functions, which is the
955
+ # namespaceplugin and all of its superclasses (minus the base object).
956
+ reserved_cls_names = tuple({_class.__name__ for _class in cls.__nsplugin__.mro() if _class is not object})
957
+
954
958
  # Collect the public attrs of the subplugin
955
959
  for subplugin_func_name in cls.__exports__:
956
960
  subplugin_func = inspect.getattr_static(cls, subplugin_func_name)
@@ -963,12 +967,15 @@ class NamespacePlugin(Plugin):
963
967
  if getattr(subplugin_func, "__output__", None) != "record":
964
968
  continue
965
969
 
966
- # The method needs to be part of the current subclass and not a parent
967
- if not subplugin_func.__qualname__.startswith(cls.__name__):
970
+ # The method may not be part of a parent class.
971
+ if subplugin_func.__qualname__.startswith(reserved_cls_names):
968
972
  continue
969
973
 
970
974
  # If we already have an aggregate method, skip
971
975
  if existing_aggregator := getattr(cls.__nsplugin__, subplugin_func_name, None):
976
+ if not hasattr(existing_aggregator, "__subplugins__"):
977
+ # This is not an aggregator, but a re-implementation of a subclass function by the subplugin.
978
+ continue
972
979
  existing_aggregator.__subplugins__.append(cls.__namespace__)
973
980
  continue
974
981
 
@@ -978,10 +985,12 @@ class NamespacePlugin(Plugin):
978
985
  for entry in aggregator.__subplugins__:
979
986
  try:
980
987
  subplugin = getattr(self.target, entry)
981
- for item in getattr(subplugin, method_name)():
982
- yield item
983
- except Exception:
988
+ yield from getattr(subplugin, method_name)()
989
+ except UnsupportedPluginError:
984
990
  continue
991
+ except Exception as e:
992
+ self.target.log.error("Subplugin: %s raised an exception for: %s", entry, method_name)
993
+ self.target.log.debug("Exception: %s", e, exc_info=e)
985
994
 
986
995
  # Holds the subplugins that share this method
987
996
  aggregator.__subplugins__ = []
@@ -1,74 +1,196 @@
1
- import enum
2
1
  import itertools
3
2
  import re
4
3
  from datetime import datetime
5
4
  from pathlib import Path
6
- from typing import Iterator, Optional
5
+ from typing import Iterator, NamedTuple, Optional
7
6
 
8
7
  from dissect.target import plugin
9
8
  from dissect.target.exceptions import FileNotFoundError, UnsupportedPluginError
10
9
  from dissect.target.helpers.fsutil import open_decompress
11
- from dissect.target.plugins.apps.webserver.webserver import WebserverAccessLogRecord
10
+ from dissect.target.plugins.apps.webserver.webserver import (
11
+ WebserverAccessLogRecord,
12
+ WebserverErrorLogRecord,
13
+ WebserverPlugin,
14
+ )
12
15
  from dissect.target.target import Target
13
16
 
14
- COMMON_REGEX = r'(?P<remote_ip>.*?) (?P<remote_logname>.*?) (?P<remote_user>.*?) \[(?P<ts>.*)\] "(?P<method>.*?) (?P<uri>.*?) ?(?P<protocol>HTTP\/.*?)?" (?P<status_code>\d{3}) (?P<bytes_sent>-|\d+)' # noqa: E501
15
- REFERER_USER_AGENT_REGEX = r'"(?P<referer>.*?)" "(?P<useragent>.*?)"'
16
17
 
18
+ class LogFormat(NamedTuple):
19
+ name: str
20
+ pattern: re.Pattern
17
21
 
18
- class LogFormat(enum.Enum):
19
- VHOST_COMBINED = re.compile(rf"(?P<server_name>.*?):(?P<port>.*) {COMMON_REGEX} {REFERER_USER_AGENT_REGEX}")
20
- COMBINED = re.compile(rf"{COMMON_REGEX} {REFERER_USER_AGENT_REGEX}")
21
- COMMON = re.compile(COMMON_REGEX)
22
22
 
23
+ # e.g. CustomLog "/custom/log/location/access.log" common
24
+ RE_CONFIG_CUSTOM_LOG_DIRECTIVE = re.compile(
25
+ r"""
26
+ [\s#]* # Optionally prefixed by space(s) or pound sign(s).
27
+ CustomLog # Directive indicating that a custom access log location / format is used.
28
+ \s
29
+ "?(?P<location>[^"\s]+)"? # Location to log to, optionally wrapped in double quotes.
30
+ \s
31
+ (?P<logformat>[^$]+) # Format to use (can be either a format string or a nickname).
32
+ $
33
+ """,
34
+ re.VERBOSE,
35
+ )
23
36
 
24
- def infer_log_format(line: str) -> Optional[LogFormat]:
25
- """Attempt to infer what standard LogFormat is used. Returns None if no known format can be inferred.
37
+ # e.g ErrorLog "/var/log/httpd/error_log"
38
+ RE_CONFIG_ERRORLOG_DIRECTIVE = re.compile(
39
+ r"""
40
+ [\s#]* # Optionally prefixed by space(s) or pound sign(s).
41
+ ErrorLog # Directive indicating that a custom error log location / format is used.
42
+ \s
43
+ "?(?P<location>[^"\s$]+)"? # Location to log to, optionally wrapped in double quotes.
44
+ $
45
+ """,
46
+ re.VERBOSE,
47
+ )
26
48
 
27
- Three default log type examples from Apache (note that the ipv4 could also be ipv6)::
28
- combined = '1.2.3.4 - - [19/Dec/2022:17:25:12 +0100] "GET / HTTP/1.1" 304 247 "-" "Mozilla/5.0
29
- (Windows NT 10.0; Win64; x64); AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0
30
- Safari/537.36"'
31
- common = '1.2.3.4 - - [19/Dec/2022:17:25:40 +0100] "GET / HTTP/1.1" 200 312'
32
- vhost_combined = 'example.com:80 1.2.3.4 - - [19/Dec/2022:17:25:40 +0100] "GET / HTTP/1.1" 200 312 "-"
33
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64); AppleWebKit/537.36 (KHTML, like Gecko)
34
- Chrome/108.0.0.0 Safari/537.36"'
35
- """
49
+ RE_REMOTE_PATTERN = r"""
50
+ (?P<remote_ip>.*?) # Client IP address of the request.
51
+ \s
52
+ (?P<remote_logname>.*?) # Remote logname (from identd, if supplied).
53
+ \s
54
+ (?P<remote_user>.*?) # Remote user if the request was authenticated.
55
+ """
36
56
 
37
- first_part = line.split(" ")[0]
38
- if ":" in first_part and "." in first_part:
39
- # does not start with IP, hence it must be a vhost typed log
40
- return LogFormat.VHOST_COMBINED
41
- elif line[-1:] == '"':
42
- # ends with a quotation mark, meaning three is a user agent
43
- return LogFormat.COMBINED
44
- elif line[-1:].isdigit():
45
- return LogFormat.COMMON
46
- return None
57
+ RE_REFERER_USER_AGENT_PATTERN = r"""
58
+ "(?P<referer>.*?)" # Value of the 'Referer' HTTP Header.
59
+ \s
60
+ "(?P<useragent>.*?)" # Value of the 'User-Agent' HTTP Header.
61
+ """
47
62
 
63
+ RE_RESPONSE_TIME_PATTERN = r"""
64
+ (
65
+ "
66
+ Time:\s
67
+ (?P<response_time>.*?) # Time taken to serve the response, including a unit of measurement.
68
+ "
69
+ )
70
+ """
48
71
 
49
- class ApachePlugin(plugin.Plugin):
72
+ RE_ACCESS_COMMON_PATTERN = r"""
73
+ \[(?P<ts>[^\]]*)\] # Timestamp including milliseconds.
74
+ \s
75
+ (\[(?P<pid>[0-9]+)\]\s)? # The process ID of the child that serviced the request (optional).
76
+ "
77
+ (?P<method>.*?) # The HTTP Method used for the request.
78
+ \s
79
+ (?P<uri>.*?) # The HTTP URI of the request.
80
+ \s
81
+ ?(?P<protocol>HTTP\/.*?)? # The request protocol.
82
+ "
83
+ \s
84
+ (?P<status_code>\d{3}) # The HTTP Status Code of the response.
85
+ \s
86
+ (?P<bytes_sent>-|\d+) # Bytes sent, including headers.
87
+ """
88
+
89
+ RE_ERROR_COMMON_PATTERN = r"""
90
+ \[
91
+ (?P<ts>[^\]]*) # Timestamp including milliseconds.
92
+ \]
93
+ \s
94
+ \[
95
+ (?P<module>[^:]*) # Name of the module logging the message.
96
+ \:
97
+ (?P<level>[^]]*) # Loglevel of the message.
98
+ \]
99
+ \s
100
+ \[
101
+ pid\s(?P<pid>\d*) # Process ID of current process.
102
+ (\:tid\s(?P<tid>\d*))? # Thread ID of current thread (optional).
103
+ \]
104
+ \s
105
+ ((?P<error_source>[^\:]*)\:\s)? # Source file name and line number of the log call (optional).
106
+ (
107
+ \[
108
+ client\s(?P<client>[^]]+) # Client IP address and port of the request (optional).
109
+ \]\s
110
+ )?
111
+ ((?P<error_code>\w+)\:\s)? # APR/OS error status code and string (optional).
112
+ (?P<message>.*) # The actual log message.
113
+ """
114
+
115
+ LOG_FORMAT_ACCESS_COMMON = LogFormat(
116
+ "common",
117
+ re.compile(
118
+ rf"{RE_REMOTE_PATTERN}\s{RE_ACCESS_COMMON_PATTERN}",
119
+ re.VERBOSE,
120
+ ),
121
+ )
122
+ LOG_FORMAT_ACCESS_VHOST_COMBINED = LogFormat(
123
+ "vhost_combined",
124
+ re.compile(
125
+ rf"""
126
+ (?P<server_name>.*?):(?P<port>.*)
127
+ \s
128
+ {RE_REMOTE_PATTERN}
129
+ \s
130
+ {RE_ACCESS_COMMON_PATTERN}
131
+ \s
132
+ {RE_REFERER_USER_AGENT_PATTERN}
133
+ """,
134
+ re.VERBOSE,
135
+ ),
136
+ )
137
+ LOG_FORMAT_ACCESS_COMBINED = LogFormat(
138
+ "combined",
139
+ re.compile(
140
+ rf"{RE_REMOTE_PATTERN}\s{RE_ACCESS_COMMON_PATTERN}\s{RE_REFERER_USER_AGENT_PATTERN}",
141
+ re.VERBOSE,
142
+ ),
143
+ )
144
+ LOG_FORMAT_ERROR_COMMON = LogFormat("error", re.compile(RE_ERROR_COMMON_PATTERN, re.VERBOSE))
145
+
146
+
147
+ def apache_response_time_to_ms(time_str: str) -> int:
148
+ """Convert a string containing amount and measurement (e.g. '10000 microsecs') to milliseconds."""
149
+ amount, _, measurement = time_str.partition(" ")
150
+ amount = int(amount)
151
+ if measurement == "microsecs":
152
+ return amount // 1000
153
+ raise ValueError(f"Could not parse {time_str}")
154
+
155
+
156
+ class ApachePlugin(WebserverPlugin):
50
157
  """Apache log parsing plugin.
51
158
 
52
- Apache has three default log formats, which this plugin can all parse automatically. These are::
159
+ Apache has three default access log formats, which this plugin can all parse automatically. These are::
160
+
53
161
  LogFormat "%v:%p %h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"" vhost_combined
54
162
  LogFormat "%h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"" combined
55
- LogFormat "%h %l %u %t \"%r\" %>s %O" common
163
+ LogFormat "%h %l %u %t \"%r\" %>s %O"`` common
56
164
 
57
165
  For the definitions of each format string, see https://httpd.apache.org/docs/2.4/mod/mod_log_config.html#formats
58
- """
166
+
167
+ For Apache, the error logs by default follow the following format::
168
+
169
+ ErrorLogFormat ``"[%{u}t] [%-m:%l] [pid %P:tid %T] %7F: %E: [client\ %a] %M% ,\ referer\ %{Referer}i"``
170
+ """ # noqa: E501, W605
59
171
 
60
172
  __namespace__ = "apache"
61
173
 
174
+ DEFAULT_LOG_DIRS = ["/var/log/apache2", "/var/log/apache", "/var/log/httpd", "/var/log"]
175
+ ACCESS_LOG_NAMES = ["access.log", "access_log", "httpd-access.log"]
176
+ ERROR_LOG_NAMES = ["error.log"]
177
+ DEFAULT_CONFIG_PATHS = [
178
+ "/etc/apache2/apache2.conf",
179
+ "/usr/local/etc/apache22/httpd.conf",
180
+ "/etc/httpd/conf/httpd.conf",
181
+ "/etc/httpd.conf",
182
+ ]
183
+
62
184
  def __init__(self, target: Target):
63
185
  super().__init__(target)
64
- self.log_paths = self.get_log_paths()
186
+ self.access_log_paths, self.error_log_paths = self.get_log_paths()
65
187
 
66
188
  def check_compatible(self) -> None:
67
- if not len(self.log_paths):
189
+ if not len(self.access_log_paths) and not len(self.error_log_paths):
68
190
  raise UnsupportedPluginError("No Apache directories found")
69
191
 
70
192
  @plugin.internal
71
- def get_log_paths(self) -> list[Path]:
193
+ def get_log_paths(self) -> tuple[list[Path], list[Path]]:
72
194
  """
73
195
  Discover any present Apache log paths on the target system.
74
196
 
@@ -77,83 +199,175 @@ class ApachePlugin(plugin.Plugin):
77
199
  - https://unix.stackexchange.com/a/269090
78
200
  """
79
201
 
80
- log_paths = []
202
+ access_log_paths = set()
203
+ error_log_paths = set()
81
204
 
82
205
  # Check if any well known default Apache log locations exist
83
- default_log_dirs = ["/var/log/apache2", "/var/log/apache", "/var/log/httpd", "/var/log"]
84
- default_log_names = ["access.log", "access_log", "httpd-access.log"]
85
- for log_dir, log_name in itertools.product(default_log_dirs, default_log_names):
86
- log_paths.extend(self.target.fs.path(log_dir).glob(log_name + "*"))
87
-
88
- # Check default Apache configs for their CustomLog directive
89
- default_config_paths = [
90
- "/etc/apache2/apache2.conf",
91
- "/usr/local/etc/apache22/httpd.conf",
92
- "/etc/httpd/conf/httpd.conf",
93
- ]
94
-
95
- for config in default_config_paths:
206
+ for log_dir, log_name in itertools.product(self.DEFAULT_LOG_DIRS, self.ACCESS_LOG_NAMES):
207
+ access_log_paths.update(self.target.fs.path(log_dir).glob(f"{log_name}*"))
208
+
209
+ for log_dir, log_name in itertools.product(self.DEFAULT_LOG_DIRS, self.ERROR_LOG_NAMES):
210
+ error_log_paths.update(self.target.fs.path(log_dir).glob(f"{log_name}*"))
211
+
212
+ # Check default Apache configs for CustomLog or ErrorLog directives
213
+ for config in self.DEFAULT_CONFIG_PATHS:
96
214
  if (path := self.target.fs.path(config)).exists():
97
215
  for line in path.open("rt"):
98
216
  line = line.strip()
99
217
 
100
- if not line or "CustomLog" not in line:
218
+ if not line or ("CustomLog" not in line and "ErrorLog" not in line):
101
219
  continue
102
220
 
103
- try:
104
- # CustomLog "/custom/log/location/access.log" common
105
- log_path = line.split("CustomLog")[1].strip().split(" ")[0].replace('"', "")
106
- custom_log = self.target.fs.path(log_path)
107
- log_paths.extend(
108
- path for path in custom_log.parent.glob(f"{custom_log.name}*") if path not in log_paths
109
- )
110
- except IndexError:
221
+ if "ErrorLog" in line:
222
+ set_to_update = error_log_paths
223
+ pattern_to_use = RE_CONFIG_ERRORLOG_DIRECTIVE
224
+ else:
225
+ set_to_update = access_log_paths
226
+ pattern_to_use = RE_CONFIG_CUSTOM_LOG_DIRECTIVE
227
+
228
+ match = pattern_to_use.match(line)
229
+ if not match:
111
230
  self.target.log.warning("Unexpected Apache log configuration: %s (%s)", line, path)
231
+ continue
232
+
233
+ directive = match.groupdict()
234
+ custom_log = self.target.fs.path(directive["location"])
235
+ set_to_update.update(path for path in custom_log.parent.glob(f"{custom_log.name}*"))
112
236
 
113
- return log_paths
237
+ return sorted(access_log_paths), sorted(error_log_paths)
114
238
 
115
239
  @plugin.export(record=WebserverAccessLogRecord)
116
240
  def access(self) -> Iterator[WebserverAccessLogRecord]:
117
- """Return contents of Apache access log files in unified WebserverAccessLogRecord format."""
118
- for path in self.log_paths:
241
+ """Return contents of Apache access log files in unified ``WebserverAccessLogRecord`` format."""
242
+ for line, path in self._iterate_log_lines(self.access_log_paths):
243
+ try:
244
+ logformat = self.infer_access_log_format(line)
245
+ if not logformat:
246
+ self.target.log.warning("Apache log format could not be inferred for log line: %s (%s)", line, path)
247
+ continue
248
+
249
+ match = logformat.pattern.match(line)
250
+ if not match:
251
+ self.target.log.warning(
252
+ "Could not match Apache log format %s for log line: %s (%s)", logformat.name, line, path
253
+ )
254
+ continue
255
+
256
+ log = match.groupdict()
257
+ if response_time := log.get("response_time"):
258
+ response_time = apache_response_time_to_ms(response_time)
259
+
260
+ yield WebserverAccessLogRecord(
261
+ ts=datetime.strptime(log["ts"], "%d/%b/%Y:%H:%M:%S %z"),
262
+ remote_user=log["remote_user"],
263
+ remote_ip=log["remote_ip"],
264
+ local_ip=log.get("local_ip"),
265
+ method=log["method"],
266
+ uri=log["uri"],
267
+ protocol=log["protocol"],
268
+ status_code=log["status_code"],
269
+ bytes_sent=log["bytes_sent"].strip("-") or 0,
270
+ pid=log.get("pid"),
271
+ referer=log.get("referer"),
272
+ useragent=log.get("useragent"),
273
+ response_time_ms=response_time,
274
+ source=path,
275
+ _target=self.target,
276
+ )
277
+
278
+ except Exception as e:
279
+ self.target.log.warning("An error occured parsing Apache log file %s: %s", path, str(e))
280
+ self.target.log.debug("", exc_info=e)
281
+
282
+ @plugin.export(record=WebserverErrorLogRecord)
283
+ def error(self) -> Iterator[WebserverErrorLogRecord]:
284
+ """Return contents of Apache error log files in unified ``WebserverErrorLogRecord`` format."""
285
+ for line, path in self._iterate_log_lines(self.error_log_paths):
286
+ try:
287
+ match = LOG_FORMAT_ERROR_COMMON.pattern.match(line)
288
+ if not match:
289
+ self.target.log.warning("Could not match Apache error log format for log line: %s (%s)", line, path)
290
+ continue
291
+
292
+ log = match.groupdict()
293
+ remote_ip = log.get("client")
294
+ if remote_ip and ":" in remote_ip:
295
+ remote_ip, _, port = remote_ip.rpartition(":")
296
+ error_source = log.get("error_source")
297
+ error_code = log.get("error_code")
298
+
299
+ # Both error_source and error_code follow the same logformat. When both are present, the error source
300
+ # goes before the client and the error code goes after. However, it is also possible that only the error
301
+ # code is available, in which case it is situated *after* the client. In such situations our regex match
302
+ # has assigned the variables wrong, and we need to do a swap.
303
+ if error_source and error_code is None:
304
+ error_source, error_code = error_code, error_source
305
+
306
+ # Unlike with access logs, ErrorLogFormat doesn't log the offset to UTC but insteads logs in local time.
307
+ ts = self.target.datetime.local(datetime.strptime(log["ts"], "%a %b %d %H:%M:%S.%f %Y"))
308
+
309
+ yield WebserverErrorLogRecord(
310
+ ts=ts,
311
+ pid=log.get("pid"),
312
+ remote_ip=remote_ip,
313
+ module=log["module"],
314
+ level=log["level"],
315
+ error_source=error_source,
316
+ error_code=error_code,
317
+ message=log["message"],
318
+ source=path,
319
+ _target=self.target,
320
+ )
321
+
322
+ except Exception as e:
323
+ self.target.log.warning("An error occured parsing Apache log file %s: %s", path, str(e))
324
+ self.target.log.debug("", exc_info=e)
325
+
326
+ def _iterate_log_lines(self, paths: list[Path]) -> Iterator[tuple[str, Path]]:
327
+ """Iterate through a list of paths and yield tuples of loglines and the path of the file where they're from."""
328
+ for path in paths:
119
329
  try:
120
330
  path = path.resolve(strict=True)
121
331
  for line in open_decompress(path, "rt"):
122
332
  line = line.strip()
123
333
  if not line:
124
334
  continue
335
+ yield line, path
336
+ except FileNotFoundError:
337
+ self.target.log.warning("Apache log file configured but could not be found (dead symlink?): %s", path)
125
338
 
126
- fmt = infer_log_format(line)
127
- if not fmt:
128
- self.target.log.warning(
129
- "Apache log format could not be inferred for log line: %s (%s)", line, path
130
- )
131
- continue
339
+ @staticmethod
340
+ def infer_access_log_format(line: str) -> Optional[LogFormat]:
341
+ """Attempt to infer what standard LogFormat is used. Returns None if no known format can be inferred.
132
342
 
133
- match = fmt.value.match(line)
134
- if not match:
135
- self.target.log.warning(
136
- "Could not match Apache log format %s for log line: %s (%s)", fmt, line, path
137
- )
138
- continue
343
+ Three default log type examples from Apache (note that the ipv4 could also be ipv6)
139
344
 
140
- log = match.groupdict()
141
- yield WebserverAccessLogRecord(
142
- ts=datetime.strptime(log["ts"], "%d/%b/%Y:%H:%M:%S %z"),
143
- remote_user=log["remote_user"],
144
- remote_ip=log["remote_ip"],
145
- method=log["method"],
146
- uri=log["uri"],
147
- protocol=log["protocol"],
148
- status_code=log["status_code"],
149
- bytes_sent=log["bytes_sent"].strip("-") or 0,
150
- referer=log.get("referer"),
151
- useragent=log.get("useragent"),
152
- source=path,
153
- _target=self.target,
154
- )
155
- except FileNotFoundError:
156
- self.target.log.warning("Apache log file configured but could not be found (dead symlink?): %s", path)
157
- except Exception as e:
158
- self.target.log.warning("An error occured parsing Apache log file %s: %s", path, str(e))
159
- self.target.log.debug("", exc_info=e)
345
+
346
+ Combined::
347
+
348
+ 1.2.3.4 - - [19/Dec/2022:17:25:12 +0100] "GET / HTTP/1.1" 304 247 "-" "Mozilla/5.0
349
+ (Windows NT 10.0; Win64; x64); AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0
350
+ Safari/537.36\"
351
+
352
+ Common::
353
+
354
+ 1.2.3.4 - - [19/Dec/2022:17:25:40 +0100] "GET / HTTP/1.1" 200 312
355
+
356
+ vhost_combined::
357
+
358
+ example.com:80 1.2.3.4 - - [19/Dec/2022:17:25:40 +0100] "GET / HTTP/1.1" 200 312 "-"
359
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64); AppleWebKit/537.36 (KHTML, like Gecko)
360
+ Chrome/108.0.0.0 Safari/537.36\"
361
+ """
362
+ parts = line.split()
363
+ first_part = parts[0]
364
+ if ":" in first_part and "." in first_part:
365
+ # does not start with IP, hence it must be a vhost typed log
366
+ return LOG_FORMAT_ACCESS_VHOST_COMBINED
367
+ elif line[-1] == '"':
368
+ # ends with a quotation mark but does not contain a response time, meaning there is only a user agent
369
+ return LOG_FORMAT_ACCESS_COMBINED
370
+ elif line[-1].isdigit():
371
+ return LOG_FORMAT_ACCESS_COMMON
372
+
373
+ return None
@@ -9,7 +9,10 @@ from dissect.util.ts import from_unix
9
9
  from dissect.target import plugin
10
10
  from dissect.target.exceptions import FileNotFoundError, UnsupportedPluginError
11
11
  from dissect.target.helpers.fsutil import basename, open_decompress
12
- from dissect.target.plugins.apps.webserver.webserver import WebserverAccessLogRecord
12
+ from dissect.target.plugins.apps.webserver.webserver import (
13
+ WebserverAccessLogRecord,
14
+ WebserverPlugin,
15
+ )
13
16
  from dissect.target.target import Target
14
17
 
15
18
  LOG_FILE_REGEX = re.compile(r"(log|output file) (?P<log_file>.*)( \{)?$")
@@ -18,7 +21,7 @@ LOG_REGEX = re.compile(
18
21
  )
19
22
 
20
23
 
21
- class CaddyPlugin(plugin.Plugin):
24
+ class CaddyPlugin(WebserverPlugin):
22
25
  __namespace__ = "caddy"
23
26
 
24
27
  def __init__(self, target: Target):
@@ -0,0 +1,82 @@
1
+ import re
2
+
3
+ from dissect.target.exceptions import UnsupportedPluginError
4
+ from dissect.target.plugin import OperatingSystem
5
+ from dissect.target.plugins.apps.webserver.apache import (
6
+ RE_ACCESS_COMMON_PATTERN,
7
+ RE_REFERER_USER_AGENT_PATTERN,
8
+ RE_REMOTE_PATTERN,
9
+ RE_RESPONSE_TIME_PATTERN,
10
+ ApachePlugin,
11
+ LogFormat,
12
+ )
13
+
14
+ LOG_FORMAT_CITRIX_NETSCALER_ACCESS_COMBINED_RESPONSE_TIME = LogFormat(
15
+ "combined_resptime",
16
+ re.compile(
17
+ rf"""
18
+ {RE_REMOTE_PATTERN} # remote_ip, remote_logname, remote_user
19
+ \s
20
+ {RE_ACCESS_COMMON_PATTERN} # Timestamp, pid, method, uri, protocol, status code, bytes_sent
21
+ \s
22
+ {RE_REFERER_USER_AGENT_PATTERN} # Referer, user_agent
23
+ \s
24
+ {RE_RESPONSE_TIME_PATTERN} # Response time
25
+ """,
26
+ re.VERBOSE,
27
+ ),
28
+ )
29
+
30
+ LOG_FORMAT_CITRIX_NETSCALER_ACCESS_COMBINED_RESPONSE_TIME_WITH_HEADERS = LogFormat(
31
+ "combined_resptime_with_citrix_hdrs",
32
+ re.compile(
33
+ rf"""
34
+ (?P<remote_ip>.*?) # Client IP address of the request.
35
+ \s
36
+ ->
37
+ \s
38
+ (?P<local_ip>.*?) # Local IP of the Netscaler.
39
+ \s
40
+ (?P<remote_logname>.*?) # Remote logname (from identd, if supplied).
41
+ \s
42
+ (?P<remote_user>.*?) # Remote user if the request was authenticated.
43
+ \s
44
+ {RE_ACCESS_COMMON_PATTERN} # Timestamp, pid, method, uri, protocol, status code, bytes_sent
45
+ \s
46
+ {RE_REFERER_USER_AGENT_PATTERN} # Referer, user_agent
47
+ \s
48
+ {RE_RESPONSE_TIME_PATTERN} # Response time
49
+ """,
50
+ re.VERBOSE,
51
+ ),
52
+ )
53
+
54
+
55
+ class CitrixWebserverPlugin(ApachePlugin):
56
+ """Apache log parsing plugin for Citrix specific logs.
57
+
58
+ Citrix uses Apache with custom access log formats. These are::
59
+
60
+ LogFormat "%{Citrix-ns-orig-srcip}i -> %{Citrix-ns-orig-destip}i %l %u %t [%P] \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"Time: %D microsecs\"" combined_resptime_with_citrix_hdrs
61
+ LogFormat "%a %l %u %t [%P] \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"Time: %D microsecs\"" combined_resptime
62
+ """ # noqa: E501, W605
63
+
64
+ __namespace__ = "citrix"
65
+
66
+ ACCESS_LOG_NAMES = ApachePlugin.ACCESS_LOG_NAMES + ["httpaccess.log", "httpaccess-vpn.log"]
67
+ ERROR_LOG_NAMES = ApachePlugin.ERROR_LOG_NAMES + ["httperror.log", "httperror-vpn.log"]
68
+
69
+ def check_compatible(self) -> None:
70
+ if not self.target.os == OperatingSystem.CITRIX:
71
+ raise UnsupportedPluginError("Target is not a Citrix Netscaler")
72
+
73
+ @staticmethod
74
+ def infer_access_log_format(line: str) -> LogFormat:
75
+ splitted_line = line.split()
76
+ second_part = splitted_line[1]
77
+ if second_part == "->":
78
+ return LOG_FORMAT_CITRIX_NETSCALER_ACCESS_COMBINED_RESPONSE_TIME_WITH_HEADERS
79
+ if "Time: " in line:
80
+ return LOG_FORMAT_CITRIX_NETSCALER_ACCESS_COMBINED_RESPONSE_TIME
81
+
82
+ return ApachePlugin.infer_access_log_format(line)
@@ -11,7 +11,10 @@ from dissect.target import plugin
11
11
  from dissect.target.exceptions import FileNotFoundError as DissectFileNotFoundError
12
12
  from dissect.target.exceptions import PluginError, UnsupportedPluginError
13
13
  from dissect.target.helpers.record import TargetRecordDescriptor
14
- from dissect.target.plugins.apps.webserver.webserver import WebserverAccessLogRecord
14
+ from dissect.target.plugins.apps.webserver.webserver import (
15
+ WebserverAccessLogRecord,
16
+ WebserverPlugin,
17
+ )
15
18
 
16
19
  LOG_RECORD_NAME = "filesystem/windows/iis/logs"
17
20
 
@@ -41,7 +44,7 @@ BasicRecordDescriptor = TargetRecordDescriptor(LOG_RECORD_NAME, BASIC_RECORD_FIE
41
44
  FIELD_NAME_INVALID_CHARS_RE = re.compile(r"[^a-zA-Z0-9]")
42
45
 
43
46
 
44
- class IISLogsPlugin(plugin.Plugin):
47
+ class IISLogsPlugin(WebserverPlugin):
45
48
  """IIS 7 (and above) logs plugin.
46
49
 
47
50
  References:
@@ -6,7 +6,10 @@ from typing import Iterator
6
6
  from dissect.target import plugin
7
7
  from dissect.target.exceptions import FileNotFoundError, UnsupportedPluginError
8
8
  from dissect.target.helpers.fsutil import open_decompress
9
- from dissect.target.plugins.apps.webserver.webserver import WebserverAccessLogRecord
9
+ from dissect.target.plugins.apps.webserver.webserver import (
10
+ WebserverAccessLogRecord,
11
+ WebserverPlugin,
12
+ )
10
13
  from dissect.target.target import Target
11
14
 
12
15
  LOG_REGEX = re.compile(
@@ -14,7 +17,7 @@ LOG_REGEX = re.compile(
14
17
  )
15
18
 
16
19
 
17
- class NginxPlugin(plugin.Plugin):
20
+ class NginxPlugin(WebserverPlugin):
18
21
  __namespace__ = "nginx"
19
22
 
20
23
  def __init__(self, target: Target):
@@ -1,16 +1,16 @@
1
- from typing import Iterator
1
+ from typing import Iterator, Union
2
2
 
3
- from dissect.target.exceptions import UnsupportedPluginError
4
3
  from dissect.target.helpers.record import TargetRecordDescriptor
5
- from dissect.target.plugin import Plugin, export
6
- from dissect.target.target import Target
4
+ from dissect.target.plugin import NamespacePlugin, export
7
5
 
8
6
  WebserverAccessLogRecord = TargetRecordDescriptor(
9
- "application/log/webserver",
7
+ "application/log/webserver/access",
10
8
  [
11
9
  ("datetime", "ts"),
12
10
  ("string", "remote_user"),
13
11
  ("net.ipaddress", "remote_ip"),
12
+ ("net.ipaddress", "local_ip"),
13
+ ("varint", "pid"),
14
14
  ("string", "method"),
15
15
  ("uri", "uri"),
16
16
  ("string", "protocol"),
@@ -18,49 +18,33 @@ WebserverAccessLogRecord = TargetRecordDescriptor(
18
18
  ("varint", "bytes_sent"),
19
19
  ("uri", "referer"),
20
20
  ("string", "useragent"),
21
+ ("varint", "response_time_ms"),
21
22
  ("path", "source"),
22
23
  ],
23
24
  )
24
25
 
26
+ WebserverErrorLogRecord = TargetRecordDescriptor(
27
+ "application/log/webserver/error",
28
+ [
29
+ ("datetime", "ts"),
30
+ ("net.ipaddress", "remote_ip"),
31
+ ("varint", "pid"),
32
+ ("string", "module"),
33
+ ("string", "level"),
34
+ ("string", "error_source"),
35
+ ("string", "error_code"),
36
+ ("string", "message"),
37
+ ("path", "source"),
38
+ ],
39
+ )
25
40
 
26
- class WebserverPlugin(Plugin):
41
+
42
+ class WebserverPlugin(NamespacePlugin):
27
43
  __namespace__ = "webserver"
28
44
  __findable__ = False
29
45
 
30
- WEBSERVERS = [
31
- "apache",
32
- "nginx",
33
- "iis",
34
- "caddy",
35
- ]
36
-
37
- def __init__(self, target: Target):
38
- super().__init__(target)
39
- self._plugins = []
40
- for entry in self.WEBSERVERS:
41
- try:
42
- self._plugins.append(getattr(self.target, entry))
43
- except Exception: # noqa
44
- target.log.exception("Failed to load webserver plugin: %s", entry)
45
-
46
- def check_compatible(self) -> None:
47
- if not len(self._plugins):
48
- raise UnsupportedPluginError("No compatible webserver plugins found")
49
-
50
- def _func(self, f: str) -> Iterator[WebserverAccessLogRecord]:
51
- for p in self._plugins:
52
- try:
53
- yield from getattr(p, f)()
54
- except Exception:
55
- self.target.log.exception("Failed to execute webserver plugin: %s.%s", p._name, f)
56
-
57
- @export(record=WebserverAccessLogRecord)
58
- def logs(self) -> Iterator[WebserverAccessLogRecord]:
46
+ @export(record=[WebserverAccessLogRecord, WebserverErrorLogRecord])
47
+ def logs(self) -> Iterator[Union[WebserverAccessLogRecord, WebserverErrorLogRecord]]:
59
48
  """Returns log file records from installed webservers."""
60
49
  yield from self.access()
61
- # TODO: In the future we should add error logs too.
62
-
63
- @export(record=WebserverAccessLogRecord)
64
- def access(self) -> Iterator[WebserverAccessLogRecord]:
65
- """Returns WebserverAccessLogRecord records from installed webservers."""
66
- yield from self._func("access")
50
+ yield from self.error()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dissect.target
3
- Version: 3.15.dev23
3
+ Version: 3.15.dev25
4
4
  Summary: This module ties all other Dissect modules together, it provides a programming API and command line tools which allow easy access to various data sources inside disk images or file collections (a.k.a. targets)
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -3,7 +3,7 @@ dissect/target/container.py,sha256=9ixufT1_0WhraqttBWwQjG80caToJqvCX8VjFk8d5F0,9
3
3
  dissect/target/exceptions.py,sha256=VVW_Rq_vQinapz-2mbJ3UkxBEZpb2pE_7JlhMukdtrY,2877
4
4
  dissect/target/filesystem.py,sha256=r7JxYP1oI6fy6F29-7FCZZkldnn516d5_XQ7QhQHnH4,53765
5
5
  dissect/target/loader.py,sha256=0-LcZNi7S0qsXR7XGtrzxpuCh9BsLcqNR1T15O7SnBM,7257
6
- dissect/target/plugin.py,sha256=MyBjC7uJ-qml9SQMQ6xsNMdudsOFNJiHNMRn-AFi_pM,47405
6
+ dissect/target/plugin.py,sha256=vEk-jZdhPKhD7rxRuWGb9XAjHRXewWjflC03qOIF3rI,48113
7
7
  dissect/target/report.py,sha256=06uiP4MbNI8cWMVrC1SasNS-Yg6ptjVjckwj8Yhe0Js,7958
8
8
  dissect/target/target.py,sha256=CuqLTD3fwr4HIxtDgN_fwJ3UHSqe5PhNJlLTVGsluB8,31908
9
9
  dissect/target/volume.py,sha256=aQZAJiny8jjwkc9UtwIRwy7nINXjCxwpO-_UDfh6-BA,15801
@@ -136,11 +136,12 @@ dissect/target/plugins/apps/vpn/wireguard.py,sha256=45WvCqQQGrG3DVDH5ghcsGpM_Bom
136
136
  dissect/target/plugins/apps/webhosting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
137
  dissect/target/plugins/apps/webhosting/cpanel.py,sha256=OeFQnu9GmpffIlFyK-AR2Qf8tjyMhazWEAUyccDU5y0,2979
138
138
  dissect/target/plugins/apps/webserver/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
- dissect/target/plugins/apps/webserver/apache.py,sha256=_dZVlQMo5bngVkuQorz6QgjWxDOSLgHm2bpUqVcD2js,7182
140
- dissect/target/plugins/apps/webserver/caddy.py,sha256=Ik0mXNUpYg3dMA5M_8n5zjaFhNd-tYhM7gRPaf9ASa0,6390
141
- dissect/target/plugins/apps/webserver/iis.py,sha256=BVYwLn25n5wjX5X5uedmJOFE7CNae59twUeHuXYkPZc,14637
142
- dissect/target/plugins/apps/webserver/nginx.py,sha256=nO-YGx68Hw_meSqp2u_d8hQB7NKsx2d09b-H9ngrUKo,4097
143
- dissect/target/plugins/apps/webserver/webserver.py,sha256=_rkI0FRF4b3cUqSix_c00NoPYCfc6_GErt72sP2Jngk,2156
139
+ dissect/target/plugins/apps/webserver/apache.py,sha256=H38Zj41EkfS27x98gBTuPHJmTOmlhfMK73PX6zQ4YOY,14933
140
+ dissect/target/plugins/apps/webserver/caddy.py,sha256=qZsAK_tILGvroV4SWkDKc-Otwd41bUEtv9H9TuHmt-0,6422
141
+ dissect/target/plugins/apps/webserver/citrix.py,sha256=FEPdBteEJeeGg3B95W_27O9wLJVhenEc5A5fSLDmK18,3044
142
+ dissect/target/plugins/apps/webserver/iis.py,sha256=UwRVzLqnKScijdLoZFfpkSUzKTQosicZpn16q__4QBU,14669
143
+ dissect/target/plugins/apps/webserver/nginx.py,sha256=WA5soi1FU1c44oHRcyOoHK3gH8Jzc_Qi5uXcimDYukw,4129
144
+ dissect/target/plugins/apps/webserver/webserver.py,sha256=a7a2lLrhsa9c1AXnwiLP-tqVv-IUbmaVaSZI5S0fKa8,1500
144
145
  dissect/target/plugins/child/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
145
146
  dissect/target/plugins/child/esxi.py,sha256=GfgQzxntcHcyxAE2QjMJ-TrFhklweSXLbYh0uuv-klg,693
146
147
  dissect/target/plugins/child/hyperv.py,sha256=R2qVeu4p_9V53jO-65znN0LwX9v3FVA-9jbbtOQcEz8,2236
@@ -317,10 +318,10 @@ dissect/target/volumes/luks.py,sha256=OmCMsw6rCUXG1_plnLVLTpsvE1n_6WtoRUGQbpmu1z
317
318
  dissect/target/volumes/lvm.py,sha256=wwQVR9I3G9YzmY6UxFsH2Y4MXGBcKL9aayWGCDTiWMU,2269
318
319
  dissect/target/volumes/md.py,sha256=j1K1iKmspl0C_OJFc7-Q1BMWN2OCC5EVANIgVlJ_fIE,1673
319
320
  dissect/target/volumes/vmfs.py,sha256=-LoUbn9WNwTtLi_4K34uV_-wDw2W5hgaqxZNj4UmqAQ,1730
320
- dissect.target-3.15.dev23.dist-info/COPYRIGHT,sha256=m-9ih2RVhMiXHI2bf_oNSSgHgkeIvaYRVfKTwFbnJPA,301
321
- dissect.target-3.15.dev23.dist-info/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
322
- dissect.target-3.15.dev23.dist-info/METADATA,sha256=fDcJUO3AZyaTwQKqSs910EeZLpf8wLhykMH8u3Kff6Y,11113
323
- dissect.target-3.15.dev23.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
324
- dissect.target-3.15.dev23.dist-info/entry_points.txt,sha256=tvFPa-Ap-gakjaPwRc6Fl6mxHzxEZ_arAVU-IUYeo_s,447
325
- dissect.target-3.15.dev23.dist-info/top_level.txt,sha256=Mn-CQzEYsAbkxrUI0TnplHuXnGVKzxpDw_po_sXpvv4,8
326
- dissect.target-3.15.dev23.dist-info/RECORD,,
321
+ dissect.target-3.15.dev25.dist-info/COPYRIGHT,sha256=m-9ih2RVhMiXHI2bf_oNSSgHgkeIvaYRVfKTwFbnJPA,301
322
+ dissect.target-3.15.dev25.dist-info/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
323
+ dissect.target-3.15.dev25.dist-info/METADATA,sha256=EhglYTVaAYzVR_mUdsZKJJDV0Rcmwzsw9D_1mw9u5mg,11113
324
+ dissect.target-3.15.dev25.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
325
+ dissect.target-3.15.dev25.dist-info/entry_points.txt,sha256=tvFPa-Ap-gakjaPwRc6Fl6mxHzxEZ_arAVU-IUYeo_s,447
326
+ dissect.target-3.15.dev25.dist-info/top_level.txt,sha256=Mn-CQzEYsAbkxrUI0TnplHuXnGVKzxpDw_po_sXpvv4,8
327
+ dissect.target-3.15.dev25.dist-info/RECORD,,