npm - @appland/scanner - Versions diffs - 1.68.0 → 1.69.0 - Mend

@appland/scanner 1.68.0 → 1.69.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md +7 -0
package/built/rules/deserializationOfUntrustedData.js +52 -35
package/built/rules/lib/analyzeDataFlow.js +80 -0
package/built/rules/lib/util.js +1 -1
package/doc/rules/deserialization-of-untrusted-data.md +28 -8
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,10 @@
+# [@appland/scanner-v1.69.0](https://github.com/applandinc/appmap-js/compare/@appland/scanner-v1.68.0...@appland/scanner-v1.69.0) (2022-08-23)
+### Features
+* Track specific untrusted data in unsafe deserialization rule ([d14fd4f](https://github.com/applandinc/appmap-js/commit/d14fd4f65fcbabfebdaf0d10dcae71dc563bc1fa))
 # [@appland/scanner-v1.68.0](https://github.com/applandinc/appmap-js/compare/@appland/scanner-v1.67.0...@appland/scanner-v1.68.0) (2022-08-19)

package/built/rules/deserializationOfUntrustedData.js CHANGED Viewed

@@ -3,46 +3,62 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-const models_1 = require("@appland/models");
 const url_1 = require("url");
 const parseRuleDescription_1 = __importDefault(require("./lib/parseRuleDescription"));
-const precedingEvents_1 = __importDefault(require("./lib/precedingEvents"));
-const sanitizesData_1 = __importDefault(require("./lib/sanitizesData"));
-function allArgumentsSanitized(rootEvent, event) {
-    return (event.parameters || [])
-        .filter((parameter) => parameter.object_id)
-        .every((parameter) => {
-        for (const candidate of (0, precedingEvents_1.default)(rootEvent, event)) {
-            if ((0, sanitizesData_1.default)(candidate.event, parameter.object_id, DeserializeSanitize)) {
-                return true;
-            }
-        }
-        return false;
-    });
+const analyzeDataFlow_1 = __importDefault(require("./lib/analyzeDataFlow"));
+function valueHistory(value) {
+    const events = [];
+    const queue = [value];
+    for (;;) {
+        const current = queue.shift();
+        if (!current)
+            break;
+        const { origin, parents } = current;
+        if (!events.includes(origin))
+            events.push(origin);
+        queue.push(...parents);
+    }
+    return events;
+}
+function wasSanitized(value) {
+    return valueHistory(value).some(({ labels }) => labels.has(DeserializeSanitize));
 }
-function build() {
-    function matcher(rootEvent) {
-        for (const event of new models_1.EventNavigator(rootEvent).descendants()) {
-            // events: //*[@authorization && truthy?(returnValue) && not(preceding::*[@authentication]) && not(descendant::*[@authentication])]
-            if (event.event.labels.has(DeserializeUnsafe) &&
-                !event.event.ancestors().find((ancestor) => ancestor.labels.has(DeserializeSafe))) {
-                if (allArgumentsSanitized(rootEvent, event.event)) {
-                    return;
-                }
-                else {
-                    return [
-                        {
-                            event: event.event,
-                            message: `${event.event} deserializes untrusted data`,
-                        },
-                    ];
-                }
+function formatHistories(values) {
+    const histories = values.map(valueHistory);
+    return Object.fromEntries(histories.flatMap((history, input) => history.map((event, idx) => [`origin[${input}][${idx}]`, event])));
+}
+function label(name) {
+    return ({ labels }) => labels.has(name);
+}
+function matcher(startEvent) {
+    const flow = (0, analyzeDataFlow_1.default)([...(startEvent.message || [])], startEvent);
+    const results = [];
+    const sanitizedValues = new Set();
+    for (const [event, values] of flow) {
+        if (event.labels.has(DeserializeSanitize)) {
+            for (const v of values)
+                sanitizedValues.add(v);
+            continue;
+        }
+        if (!event.labels.has(DeserializeUnsafe))
+            continue;
+        const unsanitized = new Set(values.filter((v) => !(wasSanitized(v) || sanitizedValues.has(v))));
+        // remove any that have been passed into a safe deserialization function
+        for (const ancestor of event.ancestors().filter(label(DeserializeSafe))) {
+            for (const v of flow.get(ancestor) || []) {
+                unsanitized.delete(v);
             }
         }
+        const remaining = [...unsanitized];
+        if (remaining.length === 0)
+            continue;
+        results.push({
+            event: event,
+            message: `deserializes untrusted data: ${remaining.map(({ value: { value } }) => value)}`,
+            participatingEvents: formatHistories(remaining),
+        });
     }
-    return {
-        matcher,
-    };
+    return results;
 }
 const DeserializeUnsafe = 'deserialize.unsafe';
 const DeserializeSafe = 'deserialize.safe';
@@ -53,11 +69,12 @@ exports.default = {
     labels: [DeserializeUnsafe, DeserializeSafe, DeserializeSanitize],
     impactDomain: 'Security',
     enumerateScope: false,
+    scope: 'http_server_request',
     references: {
         'CWE-502': new url_1.URL('https://cwe.mitre.org/data/definitions/502.html'),
         'Ruby Security': new url_1.URL('https://docs.ruby-lang.org/en/3.0/doc/security_rdoc.html'),
     },
     description: (0, parseRuleDescription_1.default)('deserializationOfUntrustedData'),
     url: 'https://appland.com/docs/analysis/rules-reference.html#deserialization-of-untrusted-data',
-    build,
+    build: () => ({ matcher }),
 };

package/built/rules/lib/analyzeDataFlow.js ADDED Viewed

@@ -0,0 +1,80 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+function matches(template, value) {
+    if (template.object_id && template.object_id === value.object_id)
+        return true;
+    if (template.value && template.value === value.value)
+        return true;
+    return false;
+}
+class Matcher {
+    constructor(root, data) {
+        this.tracked = new Map();
+        for (const value of data)
+            this.add(value, root, []);
+    }
+    add(value, origin, parents) {
+        if (isPrimitive(value))
+            return;
+        this.tracked.set(value, { value, origin, parents });
+    }
+    match(value) {
+        if (isPrimitive(value))
+            return null;
+        for (const [probe, history] of this.tracked) {
+            if (matches(probe, value))
+                return history;
+        }
+        return null;
+    }
+    matches(values) {
+        return compact(values.map(this.match.bind(this)));
+    }
+}
+function isPrimitive(value) {
+    // we don't want to record any nulls,
+    // booleans or small strings and numbers
+    return !value.value || value.value.length < 6;
+}
+function isNotNullOrUndefined(x) {
+    return x !== undefined && x !== null;
+}
+function compact(x) {
+    return x.filter(isNotNullOrUndefined);
+}
+/**
+ * Tracks flow of data across the execution trace, identifying all function
+ * calls which have a tracked object as its receiver or one of the parameters.
+ * Any value such a function returns will also then become tracked.
+ * The origin chain of all values is recorded, so full provenience up to
+ * the starting set can be reconstructed.
+ * @param trackedData Initial data to track.
+ * @param startEvent The root event of the analysis.
+ * @returns Events which have a tracked piece of data as an input, each
+ * associated with the list of such inputs.
+ */
+function analyzeDataFlow(trackedData, startEvent) {
+    const matcher = new Matcher(startEvent, trackedData);
+    const events = new Map([
+        [startEvent, matcher.matches(trackedData)],
+    ]);
+    startEvent.traverse({
+        onEnter(event) {
+            const inputs = compact([...(event.parameters || []), event.receiver]);
+            const matches = matcher.matches(inputs);
+            if (matches.length === 0)
+                return;
+            events.set(event, matches);
+        },
+        onExit({ callEvent, returnValue }) {
+            if (!returnValue)
+                return;
+            const parents = events.get(callEvent);
+            if (!parents)
+                return;
+            matcher.add(returnValue, callEvent, parents);
+        },
+    });
+    return events;
+}
+exports.default = analyzeDataFlow;

package/built/rules/lib/util.js CHANGED Viewed

@@ -101,7 +101,7 @@ exports.parseValue = parseValue;
 const isTruthy = (valueObj) => !isFalsey(valueObj);
 exports.isTruthy = isTruthy;
 function providesAuthentication(event, label) {
-    return event.returnValue && event.labels.has(label) && isTruthy(event.returnValue);
+    return !!event.returnValue && event.labels.has(label) && isTruthy(event.returnValue);
 }
 exports.providesAuthentication = providesAuthentication;
 function ideLink(filePath, ide, eventId) {

package/doc/rules/deserialization-of-untrusted-data.md CHANGED Viewed

@@ -10,18 +10,35 @@ labels:
   - deserialize.unsafe
   - deserialize.safe
   - deserialize.sanitize
+scope: http_server_request
 ---
 Finds occurrances of deserialization in which the mechanism employed is known to be unsafe, and the
-data is not known to be trusted.
+data comes from an untrusted source and hasn't passed through a sanitization mechanism.
 ### Rule logic
-Finds all events labeled `deserialize.unsafe`, that are not a descendant of an event labeled
-`deserialize.safe`. For each of these events, all event parameters are checked.
+Finds all events labeled `deserialize.unsafe` that receive tainted data (as
+determined by object identity or string value) as an input.
-Each parameter whose type is `string` or `object` is verified to ensure that it's trusted. For data
-to be trusted, it must be the return value of a function labeled `deserialize.sanitize`.
+For each of these events; checks if all the inputs have been sanitized.
+Data that has been passed to a function labeled `deserialize.sanitize` is
+assumed to be sanitized from this point onwards. Such a function could either
+check the value is sanitized (note no verification is currently done to ensure
+this result is checked) or return the transformed value after any necessary sanitization.
+Data passed to a function labeled `deserialized.safe` is considered in all
+functions called by it (down the callstack). Functions that first sanitize data
+and then use an unsafe deserialization function should carry this label.
+The set of tracked tainted data initially includes the HTTP message parameters
+and is expanded to include any non-primitive (ie. longer than 5 characters)
+observed outputs of functions that consume tainted data.
+The reliability of this rule now depends on completeness of the AppMap.
+If there is a data transformation that is	not captured it's invisible to the
+rule and will result in failure to associate it with the tracked untrusted data.
 ### Notes
@@ -30,9 +47,12 @@ that executes code shortly after deserialization.
 ### Resolution
-If you can guarantee that you are using unsafe deserialization in a safe way, but it's not possible
-to obtain the raw data from a function labeled `deserialize.sanitize`, you can wrap the
-deserialization in a function labeled `deserialize.safe`.
+Consider if the library you're using offers a safe deserialization function variant that you can
+use instead. Using unsafe functions is only rarely needed and typically requires a good reason.
+If you need to use the unsafe function, make sure you're able to handle unexpected input safely.
+Sanitize the data thoroughly first; label the sanitization function with `deserialize.sanitize` label
+or wrap the whole sanitization and deserialization logic in a function labeled `deserialize.safe`.
 If you need to deserialize untrusted data, JSON is often a good choice as it is only capable of
 returning ‘primitive’ types such as strings, arrays, hashes, numbers and nil. If you need to

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@appland/scanner",
-  "version": "1.68.0",
+  "version": "1.69.0",
   "description": "",
   "bin": "built/cli.js",
   "files": [