@modelstatus/cli 0.1.25 → 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/detect/core.js +27 -8
- package/src/index.js +5 -2
- package/src/sources/filesystem.js +0 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@modelstatus/cli",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.26",
|
|
4
4
|
"description": "Track which AI models you use, where, and never get surprised by a retirement. Free offline model-health for any repo (mm status), browser sign-in for cloud inventory + alerts.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"llm",
|
package/src/detect/core.js
CHANGED
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
* returns the model strings found per line. No I/O. */
|
|
4
4
|
|
|
5
5
|
// File extensions / TLDs the family globs accidentally swallow
|
|
6
|
-
// (e.g. "command-2.0.0.tgz", "grok-free.app"). Used to reject
|
|
6
|
+
// (e.g. "command-2.0.0.tgz", "grok-free.app", "llama-3.gguf"). Used to reject
|
|
7
|
+
// generic matches. Includes model-WEIGHT/data/media extensions so a weight-file
|
|
8
|
+
// reference (llama-3.safetensors) isn't mistaken for a model usage.
|
|
7
9
|
const BANNED_TAIL =
|
|
8
|
-
/\.(tgz|tar|gz|zip|js|ts|tsx|jsx|mjs|py|go|rb|json|md|lock|sh|css|html|txt|log|yaml|yml|toml|ini|conf|cfg|env|pem|crt|key|csv|xml|pdf|sql|app|com|net|io|dev|org|ai|co)\b/;
|
|
10
|
+
/\.(tgz|tar|gz|zip|js|ts|tsx|jsx|mjs|py|go|rb|json|md|lock|sh|css|html|txt|log|yaml|yml|toml|ini|conf|cfg|env|pem|crt|key|csv|xml|pdf|sql|gguf|safetensors|bin|onnx|pt|pth|ckpt|h5|npz|parquet|arrow|jpeg|jpg|png|gif|webp|bmp|svg|mp4|wav|app|com|net|io|dev|org|ai|co)\b/;
|
|
9
11
|
|
|
10
12
|
/** Trim leading/trailing separators a greedy family glob can capture. */
|
|
11
13
|
function cleanGeneric(s) {
|
|
@@ -19,17 +21,31 @@ function isTokenChar(ch) {
|
|
|
19
21
|
return /[A-Za-z0-9._/:-]/.test(ch);
|
|
20
22
|
}
|
|
21
23
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
24
|
+
// Provider prefixes legitimately precede an id ("anthropic.claude-…", "ft:gpt-…",
|
|
25
|
+
// "us.anthropic.…", "openrouter/…"), so '.' ':' '/' on the LEFT is still a boundary.
|
|
26
|
+
function isPrefixSep(ch) {
|
|
27
|
+
return ch === "." || ch === ":" || ch === "/";
|
|
28
|
+
}
|
|
29
|
+
// Known model-id SUFFIXES seen in real configs: Bedrock ':0'/'-v1', dated
|
|
30
|
+
// '-20250514' snapshots, '@version'. The remainder starting with one is still a
|
|
31
|
+
// boundary — so "claude-opus-4-20250514" resolves inside a Bedrock ARN, while
|
|
32
|
+
// "gpt-4" still does NOT match inside "gpt-4o". Kept identical to scan-pr.ts.
|
|
33
|
+
const MODEL_SUFFIX = /^(:|-v[0-9]|-[0-9]{6,}|@)/;
|
|
34
|
+
|
|
35
|
+
/** True when `term` occurs in `haystack` at a model-id boundary — tolerating
|
|
36
|
+
* provider prefixes + known version/region/snapshot suffixes, but NOT a plain
|
|
37
|
+
* embedded match. Both are already lower-cased. */
|
|
25
38
|
function matchesAtBoundary(haystack, term) {
|
|
26
39
|
let from = 0;
|
|
27
40
|
for (;;) {
|
|
28
41
|
const at = haystack.indexOf(term, from);
|
|
29
42
|
if (at < 0) return false;
|
|
30
43
|
const before = at > 0 ? haystack[at - 1] : "";
|
|
31
|
-
const
|
|
32
|
-
|
|
44
|
+
const rest = haystack.slice(at + term.length);
|
|
45
|
+
const after = rest[0] ?? "";
|
|
46
|
+
const boundedLeft = before === "" || !isTokenChar(before) || isPrefixSep(before);
|
|
47
|
+
const boundedRight = after === "" || !isTokenChar(after) || MODEL_SUFFIX.test(rest);
|
|
48
|
+
if (boundedLeft && boundedRight) return true;
|
|
33
49
|
from = at + 1; // a later occurrence may be bounded
|
|
34
50
|
}
|
|
35
51
|
}
|
|
@@ -45,7 +61,10 @@ function looksLikeModel(s) {
|
|
|
45
61
|
export function compilePatterns(patterns) {
|
|
46
62
|
const exact = [];
|
|
47
63
|
for (const ms of patterns.model_strings || []) {
|
|
48
|
-
|
|
64
|
+
// Registry strings are curated, and the boundary matcher prevents embedded
|
|
65
|
+
// matches — so a low floor is safe and lets short real ids (o1, o3) resolve.
|
|
66
|
+
// (The old >=4 floor silently dropped the entire OpenAI o-series.)
|
|
67
|
+
if (ms.match && ms.match.length >= 2) exact.push(ms.match.toLowerCase());
|
|
49
68
|
}
|
|
50
69
|
const generic = (patterns.generic_model_regexes || []).map((r) => new RegExp(r, "gi"));
|
|
51
70
|
return { exact, generic };
|
package/src/index.js
CHANGED
|
@@ -4,6 +4,7 @@ import path from "node:path";
|
|
|
4
4
|
import { resolveAuth, loadConfig, saveConfig, clearAuth, configFilePath } from "./config.js";
|
|
5
5
|
import { createClient } from "./api.js";
|
|
6
6
|
import { collectFrom, availability, ALL_SOURCE_IDS } from "./sources/index.js";
|
|
7
|
+
import { redactValue } from "./redact.js";
|
|
7
8
|
import { loginViaBrowser } from "./auth.js";
|
|
8
9
|
import { maybeCheckForUpdate } from "./updater.js";
|
|
9
10
|
import { track, maybeAnalyticsNotice } from "./telemetry.js";
|
|
@@ -170,7 +171,9 @@ async function cmdScan(positional, flags) {
|
|
|
170
171
|
|
|
171
172
|
const usages = rows.map((r) => ({
|
|
172
173
|
model_id: r.model_id ?? undefined,
|
|
173
|
-
|
|
174
|
+
// Redact + bound the custom id: a generic-glob hit on an .env line can over-
|
|
175
|
+
// capture a secret-ish fragment, and only the snippet was being redacted.
|
|
176
|
+
custom_model_name: r.model_id ? undefined : redactValue(r.model_string).slice(0, 120),
|
|
174
177
|
environment: r.environment,
|
|
175
178
|
location_label: r.location_label,
|
|
176
179
|
source_repo: ghRepoSlug() || undefined,
|
|
@@ -241,7 +244,7 @@ async function ciReport(dir, flags, res) {
|
|
|
241
244
|
seen.add(k);
|
|
242
245
|
usages.push({
|
|
243
246
|
model_id: r?.model_id ?? undefined,
|
|
244
|
-
custom_model_name: r?.model_id ? undefined : c.model_string,
|
|
247
|
+
custom_model_name: r?.model_id ? undefined : redactValue(c.model_string).slice(0, 120),
|
|
245
248
|
environment: c.environment,
|
|
246
249
|
location_label: c.location_label,
|
|
247
250
|
source_path: c.source_path,
|
|
Binary file
|