@appland/scanner 1.68.0 → 1.69.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ # [@appland/scanner-v1.69.0](https://github.com/applandinc/appmap-js/compare/@appland/scanner-v1.68.0...@appland/scanner-v1.69.0) (2022-08-23)
2
+
3
+
4
+ ### Features
5
+
6
+ * Track specific untrusted data in unsafe deserialization rule ([d14fd4f](https://github.com/applandinc/appmap-js/commit/d14fd4f65fcbabfebdaf0d10dcae71dc563bc1fa))
7
+
1
8
  # [@appland/scanner-v1.68.0](https://github.com/applandinc/appmap-js/compare/@appland/scanner-v1.67.0...@appland/scanner-v1.68.0) (2022-08-19)
2
9
 
3
10
 
@@ -3,46 +3,62 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- const models_1 = require("@appland/models");
7
6
  const url_1 = require("url");
8
7
  const parseRuleDescription_1 = __importDefault(require("./lib/parseRuleDescription"));
9
- const precedingEvents_1 = __importDefault(require("./lib/precedingEvents"));
10
- const sanitizesData_1 = __importDefault(require("./lib/sanitizesData"));
11
- function allArgumentsSanitized(rootEvent, event) {
12
- return (event.parameters || [])
13
- .filter((parameter) => parameter.object_id)
14
- .every((parameter) => {
15
- for (const candidate of (0, precedingEvents_1.default)(rootEvent, event)) {
16
- if ((0, sanitizesData_1.default)(candidate.event, parameter.object_id, DeserializeSanitize)) {
17
- return true;
18
- }
19
- }
20
- return false;
21
- });
8
+ const analyzeDataFlow_1 = __importDefault(require("./lib/analyzeDataFlow"));
9
+ function valueHistory(value) {
10
+ const events = [];
11
+ const queue = [value];
12
+ for (;;) {
13
+ const current = queue.shift();
14
+ if (!current)
15
+ break;
16
+ const { origin, parents } = current;
17
+ if (!events.includes(origin))
18
+ events.push(origin);
19
+ queue.push(...parents);
20
+ }
21
+ return events;
22
+ }
23
+ function wasSanitized(value) {
24
+ return valueHistory(value).some(({ labels }) => labels.has(DeserializeSanitize));
22
25
  }
23
- function build() {
24
- function matcher(rootEvent) {
25
- for (const event of new models_1.EventNavigator(rootEvent).descendants()) {
26
- // events: //*[@authorization && truthy?(returnValue) && not(preceding::*[@authentication]) && not(descendant::*[@authentication])]
27
- if (event.event.labels.has(DeserializeUnsafe) &&
28
- !event.event.ancestors().find((ancestor) => ancestor.labels.has(DeserializeSafe))) {
29
- if (allArgumentsSanitized(rootEvent, event.event)) {
30
- return;
31
- }
32
- else {
33
- return [
34
- {
35
- event: event.event,
36
- message: `${event.event} deserializes untrusted data`,
37
- },
38
- ];
39
- }
26
+ function formatHistories(values) {
27
+ const histories = values.map(valueHistory);
28
+ return Object.fromEntries(histories.flatMap((history, input) => history.map((event, idx) => [`origin[${input}][${idx}]`, event])));
29
+ }
30
+ function label(name) {
31
+ return ({ labels }) => labels.has(name);
32
+ }
33
+ function matcher(startEvent) {
34
+ const flow = (0, analyzeDataFlow_1.default)([...(startEvent.message || [])], startEvent);
35
+ const results = [];
36
+ const sanitizedValues = new Set();
37
+ for (const [event, values] of flow) {
38
+ if (event.labels.has(DeserializeSanitize)) {
39
+ for (const v of values)
40
+ sanitizedValues.add(v);
41
+ continue;
42
+ }
43
+ if (!event.labels.has(DeserializeUnsafe))
44
+ continue;
45
+ const unsanitized = new Set(values.filter((v) => !(wasSanitized(v) || sanitizedValues.has(v))));
46
+ // remove any that have been passed into a safe deserialization function
47
+ for (const ancestor of event.ancestors().filter(label(DeserializeSafe))) {
48
+ for (const v of flow.get(ancestor) || []) {
49
+ unsanitized.delete(v);
40
50
  }
41
51
  }
52
+ const remaining = [...unsanitized];
53
+ if (remaining.length === 0)
54
+ continue;
55
+ results.push({
56
+ event: event,
57
+ message: `deserializes untrusted data: ${remaining.map(({ value: { value } }) => value)}`,
58
+ participatingEvents: formatHistories(remaining),
59
+ });
42
60
  }
43
- return {
44
- matcher,
45
- };
61
+ return results;
46
62
  }
47
63
  const DeserializeUnsafe = 'deserialize.unsafe';
48
64
  const DeserializeSafe = 'deserialize.safe';
@@ -53,11 +69,12 @@ exports.default = {
53
69
  labels: [DeserializeUnsafe, DeserializeSafe, DeserializeSanitize],
54
70
  impactDomain: 'Security',
55
71
  enumerateScope: false,
72
+ scope: 'http_server_request',
56
73
  references: {
57
74
  'CWE-502': new url_1.URL('https://cwe.mitre.org/data/definitions/502.html'),
58
75
  'Ruby Security': new url_1.URL('https://docs.ruby-lang.org/en/3.0/doc/security_rdoc.html'),
59
76
  },
60
77
  description: (0, parseRuleDescription_1.default)('deserializationOfUntrustedData'),
61
78
  url: 'https://appland.com/docs/analysis/rules-reference.html#deserialization-of-untrusted-data',
62
- build,
79
+ build: () => ({ matcher }),
63
80
  };
@@ -0,0 +1,80 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ function matches(template, value) {
4
+ if (template.object_id && template.object_id === value.object_id)
5
+ return true;
6
+ if (template.value && template.value === value.value)
7
+ return true;
8
+ return false;
9
+ }
10
+ class Matcher {
11
+ constructor(root, data) {
12
+ this.tracked = new Map();
13
+ for (const value of data)
14
+ this.add(value, root, []);
15
+ }
16
+ add(value, origin, parents) {
17
+ if (isPrimitive(value))
18
+ return;
19
+ this.tracked.set(value, { value, origin, parents });
20
+ }
21
+ match(value) {
22
+ if (isPrimitive(value))
23
+ return null;
24
+ for (const [probe, history] of this.tracked) {
25
+ if (matches(probe, value))
26
+ return history;
27
+ }
28
+ return null;
29
+ }
30
+ matches(values) {
31
+ return compact(values.map(this.match.bind(this)));
32
+ }
33
+ }
34
+ function isPrimitive(value) {
35
+ // we don't want to record any nulls,
36
+ // booleans or small strings and numbers
37
+ return !value.value || value.value.length < 6;
38
+ }
39
+ function isNotNullOrUndefined(x) {
40
+ return x !== undefined && x !== null;
41
+ }
42
+ function compact(x) {
43
+ return x.filter(isNotNullOrUndefined);
44
+ }
45
+ /**
46
+ * Tracks flow of data across the execution trace, identifying all function
47
+ * calls which have a tracked object as its receiver or one of the parameters.
48
+ * Any value such a function returns will also then become tracked.
49
+ * The origin chain of all values is recorded, so full provenience up to
50
+ * the starting set can be reconstructed.
51
+ * @param trackedData Initial data to track.
52
+ * @param startEvent The root event of the analysis.
53
+ * @returns Events which have a tracked piece of data as an input, each
54
+ * associated with the list of such inputs.
55
+ */
56
+ function analyzeDataFlow(trackedData, startEvent) {
57
+ const matcher = new Matcher(startEvent, trackedData);
58
+ const events = new Map([
59
+ [startEvent, matcher.matches(trackedData)],
60
+ ]);
61
+ startEvent.traverse({
62
+ onEnter(event) {
63
+ const inputs = compact([...(event.parameters || []), event.receiver]);
64
+ const matches = matcher.matches(inputs);
65
+ if (matches.length === 0)
66
+ return;
67
+ events.set(event, matches);
68
+ },
69
+ onExit({ callEvent, returnValue }) {
70
+ if (!returnValue)
71
+ return;
72
+ const parents = events.get(callEvent);
73
+ if (!parents)
74
+ return;
75
+ matcher.add(returnValue, callEvent, parents);
76
+ },
77
+ });
78
+ return events;
79
+ }
80
+ exports.default = analyzeDataFlow;
@@ -101,7 +101,7 @@ exports.parseValue = parseValue;
101
101
  const isTruthy = (valueObj) => !isFalsey(valueObj);
102
102
  exports.isTruthy = isTruthy;
103
103
  function providesAuthentication(event, label) {
104
- return event.returnValue && event.labels.has(label) && isTruthy(event.returnValue);
104
+ return !!event.returnValue && event.labels.has(label) && isTruthy(event.returnValue);
105
105
  }
106
106
  exports.providesAuthentication = providesAuthentication;
107
107
  function ideLink(filePath, ide, eventId) {
@@ -10,18 +10,35 @@ labels:
10
10
  - deserialize.unsafe
11
11
  - deserialize.safe
12
12
  - deserialize.sanitize
13
+ scope: http_server_request
13
14
  ---
14
15
 
15
16
  Finds occurrances of deserialization in which the mechanism employed is known to be unsafe, and the
16
- data is not known to be trusted.
17
+ data comes from an untrusted source and hasn't passed through a sanitization mechanism.
17
18
 
18
19
  ### Rule logic
19
20
 
20
- Finds all events labeled `deserialize.unsafe`, that are not a descendant of an event labeled
21
- `deserialize.safe`. For each of these events, all event parameters are checked.
21
+ Finds all events labeled `deserialize.unsafe` that receive tainted data (as
22
+ determined by object identity or string value) as an input.
22
23
 
23
- Each parameter whose type is `string` or `object` is verified to ensure that it's trusted. For data
24
- to be trusted, it must be the return value of a function labeled `deserialize.sanitize`.
24
+ For each of these events; checks if all the inputs have been sanitized.
25
+
26
+ Data that has been passed to a function labeled `deserialize.sanitize` is
27
+ assumed to be sanitized from this point onwards. Such a function could either
28
+ check the value is sanitized (note no verification is currently done to ensure
29
+ this result is checked) or return the transformed value after any necessary sanitization.
30
+
31
+ Data passed to a function labeled `deserialized.safe` is considered in all
32
+ functions called by it (down the callstack). Functions that first sanitize data
33
+ and then use an unsafe deserialization function should carry this label.
34
+
35
+ The set of tracked tainted data initially includes the HTTP message parameters
36
+ and is expanded to include any non-primitive (ie. longer than 5 characters)
37
+ observed outputs of functions that consume tainted data.
38
+
39
+ The reliability of this rule now depends on completeness of the AppMap.
40
+ If there is a data transformation that is not captured it's invisible to the
41
+ rule and will result in failure to associate it with the tracked untrusted data.
25
42
 
26
43
  ### Notes
27
44
 
@@ -30,9 +47,12 @@ that executes code shortly after deserialization.
30
47
 
31
48
  ### Resolution
32
49
 
33
- If you can guarantee that you are using unsafe deserialization in a safe way, but it's not possible
34
- to obtain the raw data from a function labeled `deserialize.sanitize`, you can wrap the
35
- deserialization in a function labeled `deserialize.safe`.
50
+ Consider if the library you're using offers a safe deserialization function variant that you can
51
+ use instead. Using unsafe functions is only rarely needed and typically requires a good reason.
52
+
53
+ If you need to use the unsafe function, make sure you're able to handle unexpected input safely.
54
+ Sanitize the data thoroughly first; label the sanitization function with `deserialize.sanitize` label
55
+ or wrap the whole sanitization and deserialization logic in a function labeled `deserialize.safe`.
36
56
 
37
57
  If you need to deserialize untrusted data, JSON is often a good choice as it is only capable of
38
58
  returning ‘primitive’ types such as strings, arrays, hashes, numbers and nil. If you need to
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@appland/scanner",
3
- "version": "1.68.0",
3
+ "version": "1.69.0",
4
4
  "description": "",
5
5
  "bin": "built/cli.js",
6
6
  "files": [