@modelstatus/cli 0.1.25 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@modelstatus/cli",
3
- "version": "0.1.25",
3
+ "version": "0.1.26",
4
4
  "description": "Track which AI models you use, where, and never get surprised by a retirement. Free offline model-health for any repo (mm status), browser sign-in for cloud inventory + alerts.",
5
5
  "keywords": [
6
6
  "llm",
@@ -3,9 +3,11 @@
3
3
  * returns the model strings found per line. No I/O. */
4
4
 
5
5
  // File extensions / TLDs the family globs accidentally swallow
6
- // (e.g. "command-2.0.0.tgz", "grok-free.app"). Used to reject generic matches.
6
+ // (e.g. "command-2.0.0.tgz", "grok-free.app", "llama-3.gguf"). Used to reject
7
+ // generic matches. Includes model-WEIGHT/data/media extensions so a weight-file
8
+ // reference (llama-3.safetensors) isn't mistaken for a model usage.
7
9
  const BANNED_TAIL =
8
- /\.(tgz|tar|gz|zip|js|ts|tsx|jsx|mjs|py|go|rb|json|md|lock|sh|css|html|txt|log|yaml|yml|toml|ini|conf|cfg|env|pem|crt|key|csv|xml|pdf|sql|app|com|net|io|dev|org|ai|co)\b/;
10
+ /\.(tgz|tar|gz|zip|js|ts|tsx|jsx|mjs|py|go|rb|json|md|lock|sh|css|html|txt|log|yaml|yml|toml|ini|conf|cfg|env|pem|crt|key|csv|xml|pdf|sql|gguf|safetensors|bin|onnx|pt|pth|ckpt|h5|npz|parquet|arrow|jpeg|jpg|png|gif|webp|bmp|svg|mp4|wav|app|com|net|io|dev|org|ai|co)\b/;
9
11
 
10
12
  /** Trim leading/trailing separators a greedy family glob can capture. */
11
13
  function cleanGeneric(s) {
@@ -19,17 +21,31 @@ function isTokenChar(ch) {
19
21
  return /[A-Za-z0-9._/:-]/.test(ch);
20
22
  }
21
23
 
22
- /** True when `term` occurs in `haystack` at a word-ish boundary (not embedded
23
- * inside a longer identifier). Both are already lower-cased. This is what stops
24
- * the alias "gpt-4" from matching inside "gpt-4o-mini" (→ wrong, older model). */
24
+ // Provider prefixes legitimately precede an id ("anthropic.claude-…", "ft:gpt-…",
25
+ // "us.anthropic.…", "openrouter/…"), so '.' ':' '/' on the LEFT is still a boundary.
26
+ function isPrefixSep(ch) {
27
+ return ch === "." || ch === ":" || ch === "/";
28
+ }
29
+ // Known model-id SUFFIXES seen in real configs: Bedrock ':0'/'-v1', dated
30
+ // '-20250514' snapshots, '@version'. The remainder starting with one is still a
31
+ // boundary — so "claude-opus-4-20250514" resolves inside a Bedrock ARN, while
32
+ // "gpt-4" still does NOT match inside "gpt-4o". Kept identical to scan-pr.ts.
33
+ const MODEL_SUFFIX = /^(:|-v[0-9]|-[0-9]{6,}|@)/;
34
+
35
+ /** True when `term` occurs in `haystack` at a model-id boundary — tolerating
36
+ * provider prefixes + known version/region/snapshot suffixes, but NOT a plain
37
+ * embedded match. Both are already lower-cased. */
25
38
  function matchesAtBoundary(haystack, term) {
26
39
  let from = 0;
27
40
  for (;;) {
28
41
  const at = haystack.indexOf(term, from);
29
42
  if (at < 0) return false;
30
43
  const before = at > 0 ? haystack[at - 1] : "";
31
- const after = at + term.length < haystack.length ? haystack[at + term.length] : "";
32
- if ((before === "" || !isTokenChar(before)) && (after === "" || !isTokenChar(after))) return true;
44
+ const rest = haystack.slice(at + term.length);
45
+ const after = rest[0] ?? "";
46
+ const boundedLeft = before === "" || !isTokenChar(before) || isPrefixSep(before);
47
+ const boundedRight = after === "" || !isTokenChar(after) || MODEL_SUFFIX.test(rest);
48
+ if (boundedLeft && boundedRight) return true;
33
49
  from = at + 1; // a later occurrence may be bounded
34
50
  }
35
51
  }
@@ -45,7 +61,10 @@ function looksLikeModel(s) {
45
61
  export function compilePatterns(patterns) {
46
62
  const exact = [];
47
63
  for (const ms of patterns.model_strings || []) {
48
- if (ms.match && ms.match.length >= 4) exact.push(ms.match.toLowerCase());
64
+ // Registry strings are curated, and the boundary matcher prevents embedded
65
+ // matches — so a low floor is safe and lets short real ids (o1, o3) resolve.
66
+ // (The old >=4 floor silently dropped the entire OpenAI o-series.)
67
+ if (ms.match && ms.match.length >= 2) exact.push(ms.match.toLowerCase());
49
68
  }
50
69
  const generic = (patterns.generic_model_regexes || []).map((r) => new RegExp(r, "gi"));
51
70
  return { exact, generic };
package/src/index.js CHANGED
@@ -4,6 +4,7 @@ import path from "node:path";
4
4
  import { resolveAuth, loadConfig, saveConfig, clearAuth, configFilePath } from "./config.js";
5
5
  import { createClient } from "./api.js";
6
6
  import { collectFrom, availability, ALL_SOURCE_IDS } from "./sources/index.js";
7
+ import { redactValue } from "./redact.js";
7
8
  import { loginViaBrowser } from "./auth.js";
8
9
  import { maybeCheckForUpdate } from "./updater.js";
9
10
  import { track, maybeAnalyticsNotice } from "./telemetry.js";
@@ -170,7 +171,9 @@ async function cmdScan(positional, flags) {
170
171
 
171
172
  const usages = rows.map((r) => ({
172
173
  model_id: r.model_id ?? undefined,
173
- custom_model_name: r.model_id ? undefined : r.model_string,
174
+ // Redact + bound the custom id: a generic-glob hit on an .env line can over-
175
+ // capture a secret-ish fragment, and only the snippet was being redacted.
176
+ custom_model_name: r.model_id ? undefined : redactValue(r.model_string).slice(0, 120),
174
177
  environment: r.environment,
175
178
  location_label: r.location_label,
176
179
  source_repo: ghRepoSlug() || undefined,
@@ -241,7 +244,7 @@ async function ciReport(dir, flags, res) {
241
244
  seen.add(k);
242
245
  usages.push({
243
246
  model_id: r?.model_id ?? undefined,
244
- custom_model_name: r?.model_id ? undefined : c.model_string,
247
+ custom_model_name: r?.model_id ? undefined : redactValue(c.model_string).slice(0, 120),
245
248
  environment: c.environment,
246
249
  location_label: c.location_label,
247
250
  source_path: c.source_path,
Binary file