npm - @datafog/fogclaw - Versions diffs - 0.1.0 - Mend

@datafog/fogclaw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

package/.github/workflows/harness-docs.yml +30 -0
package/AGENTS.md +28 -0
package/LICENSE +21 -0
package/README.md +208 -0
package/dist/config.d.ts +4 -0
package/dist/config.d.ts.map +1 -0
package/dist/config.js +30 -0
package/dist/config.js.map +1 -0
package/dist/engines/gliner.d.ts +14 -0
package/dist/engines/gliner.d.ts.map +1 -0
package/dist/engines/gliner.js +75 -0
package/dist/engines/gliner.js.map +1 -0
package/dist/engines/regex.d.ts +5 -0
package/dist/engines/regex.d.ts.map +1 -0
package/dist/engines/regex.js +54 -0
package/dist/engines/regex.js.map +1 -0
package/dist/index.d.ts +19 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +157 -0
package/dist/index.js.map +1 -0
package/dist/redactor.d.ts +3 -0
package/dist/redactor.d.ts.map +1 -0
package/dist/redactor.js +37 -0
package/dist/redactor.js.map +1 -0
package/dist/scanner.d.ts +11 -0
package/dist/scanner.d.ts.map +1 -0
package/dist/scanner.js +77 -0
package/dist/scanner.js.map +1 -0
package/dist/types.d.ts +31 -0
package/dist/types.d.ts.map +1 -0
package/dist/types.js +18 -0
package/dist/types.js.map +1 -0
package/docs/DATA.md +28 -0
package/docs/DESIGN.md +17 -0
package/docs/DOMAIN_DOCS.md +30 -0
package/docs/FRONTEND.md +24 -0
package/docs/OBSERVABILITY.md +25 -0
package/docs/PLANS.md +171 -0
package/docs/PRODUCT_SENSE.md +20 -0
package/docs/RELIABILITY.md +60 -0
package/docs/SECURITY.md +50 -0
package/docs/design-docs/core-beliefs.md +17 -0
package/docs/design-docs/index.md +8 -0
package/docs/generated/README.md +36 -0
package/docs/generated/memory.md +1 -0
package/docs/plans/2026-02-16-fogclaw-design.md +172 -0
package/docs/plans/2026-02-16-fogclaw-implementation.md +1606 -0
package/docs/plans/README.md +15 -0
package/docs/plans/active/2026-02-16-feat-openclaw-official-submission-plan.md +386 -0
package/docs/plans/active/2026-02-17-feat-release-fogclaw-via-datafog-package-plan.md +318 -0
package/docs/plans/active/2026-02-17-feat-submit-fogclaw-to-openclaw-plan.md +244 -0
package/docs/plans/tech-debt-tracker.md +42 -0
package/docs/plugins/fogclaw.md +95 -0
package/docs/runbooks/address-review-findings.md +30 -0
package/docs/runbooks/ci-failures.md +46 -0
package/docs/runbooks/code-review.md +34 -0
package/docs/runbooks/merge-change.md +28 -0
package/docs/runbooks/pull-request.md +45 -0
package/docs/runbooks/record-evidence.md +43 -0
package/docs/runbooks/reproduce-bug.md +42 -0
package/docs/runbooks/respond-to-feedback.md +42 -0
package/docs/runbooks/review-findings.md +31 -0
package/docs/runbooks/submit-openclaw-plugin.md +68 -0
package/docs/runbooks/update-agents-md.md +59 -0
package/docs/runbooks/update-domain-docs.md +42 -0
package/docs/runbooks/validate-current-state.md +41 -0
package/docs/runbooks/verify-release.md +69 -0
package/docs/specs/2026-02-16-feat-openclaw-official-submission-spec.md +115 -0
package/docs/specs/2026-02-17-feat-submit-fogclaw-to-openclaw.md +125 -0
package/docs/specs/README.md +5 -0
package/docs/specs/index.md +8 -0
package/docs/spikes/README.md +8 -0
package/fogclaw.config.example.json +15 -0
package/openclaw.plugin.json +45 -0
package/package.json +37 -0
package/scripts/ci/he-docs-config.json +123 -0
package/scripts/ci/he-docs-drift.sh +112 -0
package/scripts/ci/he-docs-lint.sh +234 -0
package/scripts/ci/he-plans-lint.sh +354 -0
package/scripts/ci/he-runbooks-lint.sh +445 -0
package/scripts/ci/he-specs-lint.sh +258 -0
package/scripts/ci/he-spikes-lint.sh +249 -0
package/scripts/runbooks/select-runbooks.sh +154 -0
package/src/config.ts +46 -0
package/src/engines/gliner.ts +88 -0
package/src/engines/regex.ts +71 -0
package/src/index.ts +223 -0
package/src/redactor.ts +51 -0
package/src/scanner.ts +90 -0
package/src/types.ts +52 -0
package/tests/config.test.ts +104 -0
package/tests/gliner.test.ts +184 -0
package/tests/plugin-smoke.test.ts +114 -0
package/tests/redactor.test.ts +320 -0
package/tests/regex.test.ts +345 -0
package/tests/scanner.test.ts +199 -0
package/tsconfig.json +20 -0

package/scripts/ci/he-spikes-lint.sh ADDED Viewed

@@ -0,0 +1,249 @@
+#!/bin/bash
+set -euo pipefail
+# ---------------------------------------------------------------------------
+# he-spikes-lint.sh — Lint spike documents under docs/spikes
+# ---------------------------------------------------------------------------
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+DEFAULT_CONFIG_PATH="scripts/ci/he-docs-config.json"
+# Default required headings (one per line for easy iteration)
+DEFAULT_REQUIRED_HEADINGS=(
+  "## Context"
+  "## Validation Goal"
+  "## Approach"
+  "## Findings"
+  "## Decisions"
+  "## Recommendation"
+  "## Impact on Upstream Docs"
+  "## Spike Code"
+  "## Remaining Unknowns"
+  "## Time Spent"
+  "## Revision Notes"
+)
+# Counters
+errors=0
+warnings=0
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+gh_annotate() {
+  local level="$1" file="$2" title="$3" msg="$4"
+  if [[ -n "$file" ]]; then
+    echo "::${level} file=${file},title=${title}::${msg}"
+  else
+    echo "::${level} title=${title}::${msg}"
+  fi
+}
+emit() {
+  local level="$1" file="$2" title="$3" msg="$4"
+  gh_annotate "$level" "$file" "$title" "$msg"
+  local upper
+  upper="$(echo "$level" | tr '[:lower:]' '[:upper:]')"
+  echo "${upper}: ${msg}" >&2
+  if [[ "$level" == "error" ]]; then
+    (( errors++ )) || true
+  else
+    (( warnings++ )) || true
+  fi
+}
+# Extract YAML frontmatter (text between first two --- lines, exclusive).
+# Prints frontmatter to stdout. Returns 1 if no valid frontmatter found.
+extract_frontmatter() {
+  local file="$1"
+  local first_line
+  first_line="$(head -n1 "$file" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
+  if [[ "$first_line" != "---" ]]; then
+    return 1
+  fi
+  # Find the closing --- (skip line 1, start from line 2)
+  local line_num=0
+  local found=0
+  while IFS= read -r line; do
+    line_num=$((line_num + 1))
+    if [[ $line_num -eq 1 ]]; then
+      continue
+    fi
+    local trimmed
+    trimmed="$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
+    if [[ "$trimmed" == "---" ]]; then
+      found=1
+      break
+    fi
+    echo "$line"
+  done < "$file"
+  if [[ $found -eq 0 ]]; then
+    return 1
+  fi
+  return 0
+}
+# Extract keys from frontmatter text (stdin).
+# Outputs one key per line.
+frontmatter_keys() {
+  while IFS= read -r raw; do
+    local line
+    line="$(echo "$raw" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
+    # skip blank lines and comments
+    [[ -z "$line" ]] && continue
+    [[ "$line" == \#* ]] && continue
+    # must contain a colon
+    [[ "$line" != *:* ]] && continue
+    # extract key (everything before first colon), trimmed
+    local key
+    key="$(echo "$line" | cut -d: -f1 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
+    echo "$key"
+  done
+}
+# Check if a file contains an exact full line matching the needle.
+has_exact_line() {
+  local file="$1" needle="$2"
+  grep -qFx "$needle" "$file"
+}
+# ---------------------------------------------------------------------------
+# Config loading
+# ---------------------------------------------------------------------------
+load_config() {
+  local config_rel="${HARNESS_DOCS_CONFIG:-$DEFAULT_CONFIG_PATH}"
+  local config_path="$REPO_ROOT/$config_rel"
+  if [[ ! -f "$config_path" ]]; then
+    echo "Error: he-spikes-lint missing/invalid config: Missing config '${config_rel}'. Fix: create it (bootstrap should do this) or set HARNESS_DOCS_CONFIG." >&2
+    exit 2
+  fi
+  # Validate it is a JSON object
+  if ! jq -e 'type == "object"' "$config_path" >/dev/null 2>&1; then
+    echo "Error: he-spikes-lint missing/invalid config: Config must be a JSON object." >&2
+    exit 2
+  fi
+  CONFIG_PATH="$config_path"
+}
+# Read a JSON array from config as newline-delimited strings.
+config_string_array() {
+  local key="$1"
+  jq -r "(.${key} // []) | if type == \"array\" then .[] else empty end" "$CONFIG_PATH" 2>/dev/null | while IFS= read -r v; do
+    # only emit strings
+    echo "$v"
+  done
+}
+# ---------------------------------------------------------------------------
+# Per-spike checks
+# ---------------------------------------------------------------------------
+check_placeholders() {
+  local rel="$1" file="$2" fail_ph="$3"
+  shift 3
+  local patterns=("$@")
+  for p in "${patterns[@]}"; do
+    [[ -z "$p" ]] && continue
+    if grep -qF "$p" "$file"; then
+      local msg="Spike '${rel}' contains placeholder token '${p}'."
+      if [[ "$fail_ph" == "1" ]]; then
+        emit "error" "$rel" "Placeholder token" "$msg"
+      else
+        emit "warning" "$rel" "Placeholder token" "${msg} (Set HARNESS_FAIL_ON_ARTIFACT_PLACEHOLDERS=1 to enforce.)"
+      fi
+      break
+    fi
+  done
+}
+check_spike() {
+  local file="$1"
+  local rel="${file#"$REPO_ROOT"/}"
+  # --- frontmatter ---
+  local fm
+  if ! fm="$(extract_frontmatter "$file")"; then
+    emit "error" "$rel" "Missing YAML frontmatter" \
+      "Spike '${rel}' must start with YAML frontmatter delimited by '---' lines."
+    return
+  fi
+  # Check required frontmatter keys
+  local fm_keys
+  fm_keys="$(echo "$fm" | frontmatter_keys)"
+  local required_keys
+  required_keys="$(config_string_array "required_spike_frontmatter_keys")"
+  if [[ -n "$required_keys" ]]; then
+    while IFS= read -r k; do
+      [[ -z "$k" ]] && continue
+      if ! echo "$fm_keys" | grep -qFx "$k"; then
+        emit "error" "$rel" "Missing frontmatter key" \
+          "Spike '${rel}' missing YAML frontmatter key '${k}:'."
+      fi
+    done <<< "$required_keys"
+  fi
+  # --- required headings ---
+  for h in "${DEFAULT_REQUIRED_HEADINGS[@]}"; do
+    if ! has_exact_line "$file" "$h"; then
+      emit "error" "$rel" "Missing heading" \
+        "Spike '${rel}' missing required heading line '${h}'."
+    fi
+  done
+  # --- placeholder tokens ---
+  local placeholder_patterns=()
+  while IFS= read -r p; do
+    [[ -z "$p" ]] && continue
+    placeholder_patterns+=("$p")
+  done < <(config_string_array "artifact_placeholder_patterns")
+  local fail_ph="${HARNESS_FAIL_ON_ARTIFACT_PLACEHOLDERS:-0}"
+  if [[ ${#placeholder_patterns[@]} -gt 0 ]]; then
+    check_placeholders "$rel" "$file" "$fail_ph" "${placeholder_patterns[@]}"
+  fi
+}
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+load_config
+echo "he-spikes-lint: starting"
+echo "Repro: bash scripts/ci/he-spikes-lint.sh"
+spikes_dir="$REPO_ROOT/docs/spikes"
+if [[ ! -d "$spikes_dir" ]]; then
+  echo "he-spikes-lint: OK (docs/spikes not present)"
+  exit 0
+fi
+# Collect spike files sorted
+spike_files=()
+while IFS= read -r -d '' f; do
+  spike_files+=("$f")
+done < <(find "$spikes_dir" -maxdepth 1 -name '*-spike.md' -print0 | sort -z)
+if [[ ${#spike_files[@]} -eq 0 ]]; then
+  echo "he-spikes-lint: OK (no spike files)"
+  exit 0
+fi
+for f in "${spike_files[@]}"; do
+  check_spike "$f"
+done
+if [[ $errors -gt 0 ]]; then
+  echo "he-spikes-lint: FAIL (${errors} error(s), ${warnings} warning(s))" >&2
+  exit 1
+fi
+echo "he-spikes-lint: OK (${warnings} warning(s))"
+exit 0

package/scripts/runbooks/select-runbooks.sh ADDED Viewed

@@ -0,0 +1,154 @@
+#!/bin/bash
+set -euo pipefail
+# Select runbooks whose called_from frontmatter matches a skill or step name.
+# Prints matching runbook paths (relative to repo root) to stdout.
+# --- Repo root: two parents up from this script's directory ---
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+# --- CLI argument parsing ---
+SKILL=""
+STEP=""
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --skill)
+      SKILL="$2"
+      shift 2
+      ;;
+    --step)
+      STEP="$2"
+      shift 2
+      ;;
+    *)
+      echo "Usage: $0 --skill <name> [--step <name>]" >&2
+      exit 1
+      ;;
+  esac
+done
+if [[ -z "$SKILL" ]]; then
+  echo "Error: --skill is required" >&2
+  exit 1
+fi
+# --- Main logic ---
+RUNBOOKS_DIR="$REPO_ROOT/docs/runbooks"
+if [[ ! -d "$RUNBOOKS_DIR" ]]; then
+  exit 0
+fi
+# Extract the frontmatter block (between first --- and next ---).
+# Parse called_from entries. Print the file path if skill or step matches.
+process_file() {
+  local file="$1"
+  local in_frontmatter=0
+  local in_called_from=0
+  local first_line=1
+  local called_from_items=()
+  while IFS= read -r line || [[ -n "$line" ]]; do
+    local trimmed
+    trimmed="$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
+    # First non-empty consideration: frontmatter must start at line 1 with ---
+    if [[ "$first_line" -eq 1 ]]; then
+      first_line=0
+      if [[ "$trimmed" == "---" ]]; then
+        in_frontmatter=1
+        continue
+      else
+        # No frontmatter
+        return
+      fi
+    fi
+    # Inside frontmatter
+    if [[ "$in_frontmatter" -eq 1 ]]; then
+      # Closing delimiter
+      if [[ "$trimmed" == "---" ]]; then
+        break
+      fi
+      # Skip empty lines and comments
+      if [[ -z "$trimmed" || "$trimmed" == \#* ]]; then
+        # Empty lines inside a YAML list block: keep scanning
+        if [[ "$in_called_from" -eq 1 && -z "$trimmed" ]]; then
+          continue
+        fi
+        continue
+      fi
+      # If we're collecting YAML list items for called_from
+      if [[ "$in_called_from" -eq 1 ]]; then
+        # Check if this is a list item (starts with -)
+        if [[ "$trimmed" == -* ]]; then
+          local item
+          item="$(echo "$trimmed" | sed "s/^-[[:space:]]*//;s/^[\"']//;s/[\"']$//")"
+          if [[ -n "$item" ]]; then
+            called_from_items+=("$item")
+          fi
+          continue
+        else
+          # Not a list item; if it contains a colon it's a new key — stop collecting
+          if echo "$trimmed" | grep -q ':'; then
+            in_called_from=0
+            # Fall through to process this line as a new key
+          else
+            continue
+          fi
+        fi
+      fi
+      # Check for key: value lines
+      if echo "$trimmed" | grep -q ':'; then
+        local key val
+        key="$(echo "$trimmed" | sed 's/^\([^:]*\):.*/\1/' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
+        val="$(echo "$trimmed" | sed 's/^[^:]*://' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
+        if [[ "$key" == "called_from" ]]; then
+          # Inline list: called_from: [a, b]
+          if [[ "$val" == \[* ]]; then
+            local inner
+            inner="$(echo "$val" | sed 's/^\[//;s/\]$//' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
+            if [[ -n "$inner" ]]; then
+              IFS=',' read -ra parts <<< "$inner"
+              for part in "${parts[@]}"; do
+                local cleaned
+                cleaned="$(echo "$part" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
+                if [[ -n "$cleaned" ]]; then
+                  called_from_items+=("$cleaned")
+                fi
+              done
+            fi
+          else
+            # YAML list form — start collecting on subsequent lines
+            in_called_from=1
+          fi
+        fi
+      fi
+    fi
+  done < "$file"
+  # Check for matches
+  for item in "${called_from_items[@]+"${called_from_items[@]}"}"; do
+    if [[ "$item" == "$SKILL" ]]; then
+      echo "${file#"$REPO_ROOT"/}"
+      return
+    fi
+    if [[ -n "$STEP" && "$item" == "$STEP" ]]; then
+      echo "${file#"$REPO_ROOT"/}"
+      return
+    fi
+  done
+}
+# Find all .md files, sorted for deterministic output
+while IFS= read -r mdfile; do
+  process_file "$mdfile"
+done < <(find "$RUNBOOKS_DIR" -name '*.md' -type f | sort)
+exit 0

package/src/config.ts ADDED Viewed

@@ -0,0 +1,46 @@
+import type { FogClawConfig, GuardrailAction, RedactStrategy } from "./types.js";
+const VALID_GUARDRAIL_MODES: GuardrailAction[] = ["redact", "block", "warn"];
+const VALID_REDACT_STRATEGIES: RedactStrategy[] = ["token", "mask", "hash"];
+export const DEFAULT_CONFIG: FogClawConfig = {
+  enabled: true,
+  guardrail_mode: "redact",
+  redactStrategy: "token",
+  model: "onnx-community/gliner_large-v2.1",
+  confidence_threshold: 0.5,
+  custom_entities: [],
+  entityActions: {},
+};
+export function loadConfig(overrides: Partial<FogClawConfig>): FogClawConfig {
+  const config: FogClawConfig = { ...DEFAULT_CONFIG, ...overrides };
+  if (!VALID_GUARDRAIL_MODES.includes(config.guardrail_mode)) {
+    throw new Error(
+      `Invalid guardrail_mode "${config.guardrail_mode}". Must be one of: ${VALID_GUARDRAIL_MODES.join(", ")}`,
+    );
+  }
+  if (!VALID_REDACT_STRATEGIES.includes(config.redactStrategy)) {
+    throw new Error(
+      `Invalid redactStrategy "${config.redactStrategy}". Must be one of: ${VALID_REDACT_STRATEGIES.join(", ")}`,
+    );
+  }
+  if (config.confidence_threshold < 0 || config.confidence_threshold > 1) {
+    throw new Error(
+      `confidence_threshold must be between 0 and 1, got ${config.confidence_threshold}`,
+    );
+  }
+  for (const [entityType, action] of Object.entries(config.entityActions)) {
+    if (!VALID_GUARDRAIL_MODES.includes(action)) {
+      throw new Error(
+        `Invalid action "${action}" for entity type "${entityType}". Must be one of: ${VALID_GUARDRAIL_MODES.join(", ")}`,
+      );
+    }
+  }
+  return config;
+}

package/src/engines/gliner.ts ADDED Viewed

@@ -0,0 +1,88 @@
+import type { Entity } from "../types.js";
+import { canonicalType } from "../types.js";
+const DEFAULT_NER_LABELS = [
+  "person",
+  "organization",
+  "location",
+  "address",
+  "date of birth",
+  "medical record number",
+  "account number",
+  "passport number",
+];
+export class GlinerEngine {
+  private model: any = null;
+  private modelPath: string;
+  private threshold: number;
+  private customLabels: string[] = [];
+  private initialized = false;
+  constructor(modelPath: string, threshold: number = 0.5) {
+    this.modelPath = modelPath;
+    this.threshold = threshold;
+  }
+  async initialize(): Promise<void> {
+    if (this.initialized) return;
+    try {
+      const { Gliner } = await import("gliner");
+      this.model = new Gliner({
+        tokenizerPath: this.modelPath,
+        onnxSettings: {
+          modelPath: this.modelPath,
+          executionProvider: "cpu",
+        },
+        maxWidth: 12,
+        modelType: "gliner",
+      });
+      await this.model.initialize();
+      this.initialized = true;
+    } catch (err) {
+      throw new Error(
+        `Failed to initialize GLiNER model "${this.modelPath}": ${err instanceof Error ? err.message : String(err)}`,
+      );
+    }
+  }
+  setCustomLabels(labels: string[]): void {
+    this.customLabels = labels;
+  }
+  async scan(text: string, extraLabels?: string[]): Promise<Entity[]> {
+    if (!text) return [];
+    if (!this.model) {
+      throw new Error("GLiNER engine not initialized. Call initialize() first.");
+    }
+    const labels = [
+      ...DEFAULT_NER_LABELS,
+      ...this.customLabels,
+      ...(extraLabels ?? []),
+    ];
+    // Deduplicate labels
+    const uniqueLabels = [...new Set(labels)];
+    const results = await this.model.inference(text, uniqueLabels, {
+      threshold: this.threshold,
+    });
+    return results.map(
+      (r: { text: string; label: string; score: number; start: number; end: number }) => ({
+        text: r.text,
+        label: canonicalType(r.label),
+        start: r.start,
+        end: r.end,
+        confidence: r.score,
+        source: "gliner" as const,
+      }),
+    );
+  }
+  get isInitialized(): boolean {
+    return this.initialized;
+  }
+}

package/src/engines/regex.ts ADDED Viewed

@@ -0,0 +1,71 @@
+import type { Entity } from "../types.js";
+interface PatternDef {
+  label: string;
+  pattern: RegExp;
+}
+const PATTERNS: PatternDef[] = [
+  {
+    label: "EMAIL",
+    pattern:
+      /(?<![A-Za-z0-9._%+\-@])(?![A-Za-z_]{2,20}=)[A-Za-z0-9!#$%&*+\-/=^_`{|}~][A-Za-z0-9!#$%&'*+\-/=?^_`{|}~.]*@(?:\.?[A-Za-z0-9-]+\.)+[A-Za-z]{2,}(?=$|[^A-Za-z])/gi,
+  },
+  {
+    label: "PHONE",
+    pattern:
+      /(?<![A-Za-z0-9])(?:(?:(?:\+?1)[-.\s]?)?(?:\(\d{3}\)|\d{3})[-.\s]?\d{3}[-.\s]?\d{4}|\+\d{1,3}[\s\-.]?\d{1,4}(?:[\s\-.]?\d{2,4}){2,3})(?![-A-Za-z0-9])/gi,
+  },
+  {
+    label: "SSN",
+    pattern:
+      /(?<!\d)(?:(?!000|666)\d{3}-(?!00)\d{2}-(?!0000)\d{4}|(?!000|666)\d{3}(?!00)\d{2}(?!0000)\d{4})(?!\d)/g,
+  },
+  {
+    label: "CREDIT_CARD",
+    pattern:
+      /\b(?:4\d{12}(?:\d{3})?|5[1-5]\d{14}|3[47]\d{13}|(?:(?:4\d{3}|5[1-5]\d{2}|3[47]\d{2})[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4})|(?:3[47]\d{2}[-\s]?\d{6}[-\s]?\d{5}))\b/g,
+  },
+  {
+    label: "IP_ADDRESS",
+    pattern:
+      /\b(?:(?:25[0-5]|2[0-4]\d|1?\d?\d)\.(?:25[0-5]|2[0-4]\d|1?\d?\d)\.(?:25[0-5]|2[0-4]\d|1?\d?\d)\.(?:25[0-5]|2[0-4]\d|1?\d?\d))\b/g,
+  },
+  {
+    label: "DATE",
+    pattern:
+      /\b(?:(?:0?[1-9]|1[0-2])[/-](?:0?[1-9]|[12]\d|3[01])[/-](?:\d{2}|\d{4})|(?:\d{4})-(?:0?[1-9]|1[0-2])-(?:0?[1-9]|[12]\d|3[01])|(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+(?:0?[1-9]|[12]\d|3[01]),\s+(?:19|20)\d{2})\b/gi,
+  },
+  {
+    label: "ZIP_CODE",
+    pattern: /\b\d{5}(?:-\d{4})?\b/g,
+  },
+];
+export class RegexEngine {
+  scan(text: string): Entity[] {
+    const entities: Entity[] = [];
+    for (const { label, pattern } of PATTERNS) {
+      // Reset lastIndex to avoid stale state from previous calls
+      pattern.lastIndex = 0;
+      let match: RegExpExecArray | null;
+      while ((match = pattern.exec(text)) !== null) {
+        entities.push({
+          text: match[0],
+          label,
+          start: match.index,
+          end: match.index + match[0].length,
+          confidence: 1.0,
+          source: "regex",
+        });
+      }
+    }
+    // Sort by start position for deterministic output
+    entities.sort((a, b) => a.start - b.start || a.end - b.end);
+    return entities;
+  }
+}