@hiveai/core 0.11.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +50 -1
- package/dist/index.js +90 -4
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -718,6 +718,55 @@ declare function isGlobPath(p: string): boolean;
|
|
|
718
718
|
declare function globToRegExp(pattern: string): RegExp;
|
|
719
719
|
declare function relPathFrom(root: string, abs: string): string;
|
|
720
720
|
|
|
721
|
+
/**
|
|
722
|
+
* Distinctive-token corroboration for the anti-pattern gate.
|
|
723
|
+
*
|
|
724
|
+
* The pre-commit gate used to hard-block whenever a diff shared ANY ≥4-char token
|
|
725
|
+
* with an anchored gotcha's body. That fires on ubiquitous domain words ("memory",
|
|
726
|
+
* "sensor", "scope", "input", "version") and on version-bump diffs — blocking agents
|
|
727
|
+
* for nothing. The fix: a `literal` overlap only corroborates a BLOCK when at least
|
|
728
|
+
* one shared token is *distinctive* to that gotcha — i.e. rare across the gotcha
|
|
729
|
+
* corpus (low document frequency), like `BigInt`, `open-in-view`, `rec_7`. Common
|
|
730
|
+
* words can still surface the warning for review; they just can't hard-block.
|
|
731
|
+
*
|
|
732
|
+
* Pure module (no I/O), TF-IDF-style. Unit-tested in `test/distinctive.test.ts`.
|
|
733
|
+
*/
|
|
734
|
+
/**
|
|
735
|
+
* Language keywords + ubiquitous code words that would match almost any memory body
|
|
736
|
+
* and so carry no distinguishing signal. Shared by the diff tokenizer and the
|
|
737
|
+
* distinctiveness check so "literal" stays meaningful.
|
|
738
|
+
*/
|
|
739
|
+
declare const CODE_STOPWORDS: Set<string>;
|
|
740
|
+
/** Minimum token length kept for word-level matching (shorter tokens are too noisy). */
|
|
741
|
+
declare const MIN_WORD_LEN = 4;
|
|
742
|
+
/** Split text into lowercase word tokens (>= MIN_WORD_LEN, excluding code stopwords). */
|
|
743
|
+
declare function tokenizeWords(text: string): string[];
|
|
744
|
+
interface DocFrequency {
|
|
745
|
+
/** token -> number of documents (memory bodies) it appears in */
|
|
746
|
+
df: Map<string, number>;
|
|
747
|
+
/** total number of documents */
|
|
748
|
+
total: number;
|
|
749
|
+
}
|
|
750
|
+
/** Build per-token document frequency across a corpus of memory bodies. */
|
|
751
|
+
declare function buildDocFrequency(bodies: string[]): DocFrequency;
|
|
752
|
+
/**
|
|
753
|
+
* Document-frequency cap at/below which a token counts as distinctive. Deliberately
|
|
754
|
+
* strict — "distinctive" means *rare* (≈ the bottom 10% of the corpus), with a floor
|
|
755
|
+
* of 1 so a token appearing in a single memory is always distinctive. Strictness is
|
|
756
|
+
* intentional: blocking is the aggressive action, so we under-block rather than fire
|
|
757
|
+
* on a word that several gotchas happen to share.
|
|
758
|
+
*/
|
|
759
|
+
declare function distinctiveCap(total: number): number;
|
|
760
|
+
/** True when `token` is distinctive (rare) within the corpus. */
|
|
761
|
+
declare function isDistinctiveToken(token: string, freq: DocFrequency): boolean;
|
|
762
|
+
/**
|
|
763
|
+
* True when the added diff text shares at least one *distinctive* word token with the
|
|
764
|
+
* memory body. This is the precise corroboration the block decision should require:
|
|
765
|
+
* "the change actually contains the specific thing this gotcha warns about", not
|
|
766
|
+
* "the change happens to mention a common domain word".
|
|
767
|
+
*/
|
|
768
|
+
declare function diffHasDistinctiveOverlap(addedDiffText: string, memoryBody: string, freq: DocFrequency): boolean;
|
|
769
|
+
|
|
721
770
|
/**
|
|
722
771
|
* Progressive disclosure for `skill` memories.
|
|
723
772
|
*
|
|
@@ -1417,4 +1466,4 @@ interface SensorSuggestionOptions {
|
|
|
1417
1466
|
*/
|
|
1418
1467
|
declare function suggestSensorFromMemory(body: string, anchorPaths: string[], options?: SensorSuggestionOptions): Sensor | null;
|
|
1419
1468
|
|
|
1420
|
-
export { AUTOPILOT_DEFAULTS, type Activation, type ActivationContext, ActivationSchema, type Anchor, AnchorSchema, type AntiPatternGate, type AutoPromoteRule, BRIEFING_MARKER_TTL_MS, BRIEFING_PRESET_DEFAULTS, type BreakingChange, type BriefingBudgetNumbers, type BriefingBudgetPreset, type BriefingMarker, type BudgetPart, type BudgetSlice, type BuildCodeMapOptions, CHARS_PER_TOKEN, CODE_MAP_FILE, CONFIG_FILE, type CodeExport, type CodeExportKind, type CodeFileEntry, type CodeMap, type CodeMapQueryOptions, type CollectTimelineOpts, type ConfidenceLevel, type ConfidenceThresholds, type ConflictCandidatePair, type ConflictCandidatesOpts, type ContractDiffResult, type ContractFile, type ContractSnapshot, CrossRepoProvenanceSchema, type CrossRepoReport, type CrossRepoSource, DECAY_DAYS, DEFAULT_AUTO_PROMOTE_RULE, DEFAULT_CONFIDENCE_THRESHOLDS, DEFAULT_CONFIG, DEFAULT_DORMANT_DAYS, type DepChange, type DepTrackResult, type DependencySnapshot, type EvalReport, type EvalSpec, GUESSABLE_THRESHOLD, HAIVE_DIR, type HaiveConfig, type HaivePaths, type ImpactOptions, type ImpactScore, type ImpactSummary, type ImpactTier, type LexicalRankResult, type LoadedMemory, MEMORIES_DIR, type Memory, type MemoryFrontmatter, MemoryFrontmatterSchema, type MemoryScope, MemoryScopeSchema, type MemoryStatus, MemoryStatusSchema, type MemoryType, MemoryTypeSchema, type MemoryUsage, PROJECT_CONTEXT_FILE, RUNTIME_JOURNAL_FILENAME, type ResolveProjectInfo, type RetirementSignal, type RetrievalAggregate, type RetrievalCase, type RetrievalCaseResult, type RuntimeJournalEntry, SESSION_RECAP_TTL_MS, STACK_PACK_TAG, type SelfEvalOptions, type Sensor, type SensorAggregate, type SensorCase, type SensorCaseResult, type SensorHit, SensorSchema, type SensorSuggestionOptions, type SensorTarget, type SkillActivation, type TimelineEntry, type TopicStatusPair, type TruncateOptions, type TruncateResult, USAGE_FILE, USAGE_LOG_DIR, USAGE_LOG_FILE, type UsageAggregate, type UsageEvent, type UsageIndex, type VerifyOptions, type VerifyResult, addedLinesFromDiff, aggregateRetrieval, aggregateSensors, aggregateUsage, allocateBudget, antiPatternGateParams, appendRuntimeJournalEntry, appendUsageEvent, briefingMarkerPath, briefingMarkersDir, buildCodeMap, buildFrontmatter, buildReport, bumpRead, codeMapPath, collectTimelineEntries, compareImpact, compileRegexSensor, computeImpact, configPath, contractLockPath, deriveConfidence, diffContract, emptyUsage, emptyUsageIndex, enforcementDir, estimateTokens, evaluateSkillActivation, extractActionsBriefBody, extractSnippet, findLexicalConflictPairs, findProjectRoot, findTopicStatusConflictPairs, firstMemoryOneLine, getUsage, globToRegExp, hasRecentBriefingMarker, inferModulesFromPaths, isAutoPromoteEligible, isDecaying, isFreshIsoDate, isGlobPath, isLikelyGuessable, isRetiredMemory, isSkill, isSkillSuppressed, isStackPackSeed, listMarkdownFilesRecursive, literalMatchesAllTokens, literalMatchesAnyToken, loadCodeMap, loadConfig, loadConfigSync, loadMemoriesFromDir, loadMemory, loadUsageIndex, memoryFilePath, memoryMatchesAnchorPaths, newMemoryId, normalizeSessionId, overallScore, parseMemory, parseSince, pathsOverlap, pickSnippetNeedle, pullCrossRepoSources, queryCodeMap, rankMemoriesLexical, readRecentBriefingMarker, readRuntimeJournalTail, readUsageEvents, recordApplied, recordRejection, relPathFrom, resolveBriefingBudget, resolveHaivePaths, resolveManifestFiles, resolveProjectInfo, retirementSignal, runRegexSensor, runSensors, runtimeJournalPath, saveCodeMap, saveConfig, saveUsageIndex, scoreRetrievalCase, scoreSensorCase, sensorAppliesToPath, sensorTargetsFromDiff, serializeMemory, snapshotContract, specificityScore, stripPrivate, suggestSensorFromMemory, suggestTopicKey, summarizeImpact, synthesizeSelfEvalCases, titleFromBody, tokenizeQuery, trackDependencies, trackReads, truncateToTokens, usageLogPath, usageLogSize, usagePath, verifyAnchor, watchContracts, writeBriefingMarker };
|
|
1469
|
+
export { AUTOPILOT_DEFAULTS, type Activation, type ActivationContext, ActivationSchema, type Anchor, AnchorSchema, type AntiPatternGate, type AutoPromoteRule, BRIEFING_MARKER_TTL_MS, BRIEFING_PRESET_DEFAULTS, type BreakingChange, type BriefingBudgetNumbers, type BriefingBudgetPreset, type BriefingMarker, type BudgetPart, type BudgetSlice, type BuildCodeMapOptions, CHARS_PER_TOKEN, CODE_MAP_FILE, CODE_STOPWORDS, CONFIG_FILE, type CodeExport, type CodeExportKind, type CodeFileEntry, type CodeMap, type CodeMapQueryOptions, type CollectTimelineOpts, type ConfidenceLevel, type ConfidenceThresholds, type ConflictCandidatePair, type ConflictCandidatesOpts, type ContractDiffResult, type ContractFile, type ContractSnapshot, CrossRepoProvenanceSchema, type CrossRepoReport, type CrossRepoSource, DECAY_DAYS, DEFAULT_AUTO_PROMOTE_RULE, DEFAULT_CONFIDENCE_THRESHOLDS, DEFAULT_CONFIG, DEFAULT_DORMANT_DAYS, type DepChange, type DepTrackResult, type DependencySnapshot, type DocFrequency, type EvalReport, type EvalSpec, GUESSABLE_THRESHOLD, HAIVE_DIR, type HaiveConfig, type HaivePaths, type ImpactOptions, type ImpactScore, type ImpactSummary, type ImpactTier, type LexicalRankResult, type LoadedMemory, MEMORIES_DIR, MIN_WORD_LEN, type Memory, type MemoryFrontmatter, MemoryFrontmatterSchema, type MemoryScope, MemoryScopeSchema, type MemoryStatus, MemoryStatusSchema, type MemoryType, MemoryTypeSchema, type MemoryUsage, PROJECT_CONTEXT_FILE, RUNTIME_JOURNAL_FILENAME, type ResolveProjectInfo, type RetirementSignal, type RetrievalAggregate, type RetrievalCase, type RetrievalCaseResult, type RuntimeJournalEntry, SESSION_RECAP_TTL_MS, STACK_PACK_TAG, type SelfEvalOptions, type Sensor, type SensorAggregate, type SensorCase, type SensorCaseResult, type SensorHit, SensorSchema, type SensorSuggestionOptions, type SensorTarget, type SkillActivation, type TimelineEntry, type TopicStatusPair, type TruncateOptions, type TruncateResult, USAGE_FILE, USAGE_LOG_DIR, USAGE_LOG_FILE, type UsageAggregate, type UsageEvent, type UsageIndex, type VerifyOptions, type VerifyResult, addedLinesFromDiff, aggregateRetrieval, aggregateSensors, aggregateUsage, allocateBudget, antiPatternGateParams, appendRuntimeJournalEntry, appendUsageEvent, briefingMarkerPath, briefingMarkersDir, buildCodeMap, buildDocFrequency, buildFrontmatter, buildReport, bumpRead, codeMapPath, collectTimelineEntries, compareImpact, compileRegexSensor, computeImpact, configPath, contractLockPath, deriveConfidence, diffContract, diffHasDistinctiveOverlap, distinctiveCap, emptyUsage, emptyUsageIndex, enforcementDir, estimateTokens, evaluateSkillActivation, extractActionsBriefBody, extractSnippet, findLexicalConflictPairs, findProjectRoot, findTopicStatusConflictPairs, firstMemoryOneLine, getUsage, globToRegExp, hasRecentBriefingMarker, inferModulesFromPaths, isAutoPromoteEligible, isDecaying, isDistinctiveToken, isFreshIsoDate, isGlobPath, isLikelyGuessable, isRetiredMemory, isSkill, isSkillSuppressed, isStackPackSeed, listMarkdownFilesRecursive, literalMatchesAllTokens, literalMatchesAnyToken, loadCodeMap, loadConfig, loadConfigSync, loadMemoriesFromDir, loadMemory, loadUsageIndex, memoryFilePath, memoryMatchesAnchorPaths, newMemoryId, normalizeSessionId, overallScore, parseMemory, parseSince, pathsOverlap, pickSnippetNeedle, pullCrossRepoSources, queryCodeMap, rankMemoriesLexical, readRecentBriefingMarker, readRuntimeJournalTail, readUsageEvents, recordApplied, recordRejection, relPathFrom, resolveBriefingBudget, resolveHaivePaths, resolveManifestFiles, resolveProjectInfo, retirementSignal, runRegexSensor, runSensors, runtimeJournalPath, saveCodeMap, saveConfig, saveUsageIndex, scoreRetrievalCase, scoreSensorCase, sensorAppliesToPath, sensorTargetsFromDiff, serializeMemory, snapshotContract, specificityScore, stripPrivate, suggestSensorFromMemory, suggestTopicKey, summarizeImpact, synthesizeSelfEvalCases, titleFromBody, tokenizeQuery, tokenizeWords, trackDependencies, trackReads, truncateToTokens, usageLogPath, usageLogSize, usagePath, verifyAnchor, watchContracts, writeBriefingMarker };
|
package/dist/index.js
CHANGED
|
@@ -926,6 +926,85 @@ function isAutoPromoteEligible(fm, usage, rule = DEFAULT_AUTO_PROMOTE_RULE) {
|
|
|
926
926
|
return usage.read_count >= rule.minReads;
|
|
927
927
|
}
|
|
928
928
|
|
|
929
|
+
// src/distinctive.ts
|
|
930
|
+
var CODE_STOPWORDS = /* @__PURE__ */ new Set([
|
|
931
|
+
"import",
|
|
932
|
+
"export",
|
|
933
|
+
"function",
|
|
934
|
+
"return",
|
|
935
|
+
"const",
|
|
936
|
+
"let",
|
|
937
|
+
"var",
|
|
938
|
+
"class",
|
|
939
|
+
"public",
|
|
940
|
+
"private",
|
|
941
|
+
"protected",
|
|
942
|
+
"static",
|
|
943
|
+
"this",
|
|
944
|
+
"true",
|
|
945
|
+
"false",
|
|
946
|
+
"null",
|
|
947
|
+
"undefined",
|
|
948
|
+
"void",
|
|
949
|
+
"async",
|
|
950
|
+
"await",
|
|
951
|
+
"from",
|
|
952
|
+
"type",
|
|
953
|
+
"interface",
|
|
954
|
+
"extends",
|
|
955
|
+
"implements",
|
|
956
|
+
"number",
|
|
957
|
+
"string",
|
|
958
|
+
"boolean",
|
|
959
|
+
"value",
|
|
960
|
+
"default",
|
|
961
|
+
"case",
|
|
962
|
+
"break",
|
|
963
|
+
"continue",
|
|
964
|
+
"throw",
|
|
965
|
+
"catch",
|
|
966
|
+
"finally",
|
|
967
|
+
"else",
|
|
968
|
+
"while",
|
|
969
|
+
"for",
|
|
970
|
+
"new",
|
|
971
|
+
"super",
|
|
972
|
+
"yield",
|
|
973
|
+
"module",
|
|
974
|
+
"require",
|
|
975
|
+
"console"
|
|
976
|
+
]);
|
|
977
|
+
var MIN_WORD_LEN = 4;
|
|
978
|
+
function tokenizeWords(text) {
|
|
979
|
+
return text.toLowerCase().split(/[^a-z0-9]+/).filter((t) => t.length >= MIN_WORD_LEN && !CODE_STOPWORDS.has(t));
|
|
980
|
+
}
|
|
981
|
+
function buildDocFrequency(bodies) {
|
|
982
|
+
const df = /* @__PURE__ */ new Map();
|
|
983
|
+
for (const body of bodies) {
|
|
984
|
+
const unique = new Set(tokenizeWords(body));
|
|
985
|
+
for (const tok of unique) df.set(tok, (df.get(tok) ?? 0) + 1);
|
|
986
|
+
}
|
|
987
|
+
return { df, total: bodies.length };
|
|
988
|
+
}
|
|
989
|
+
function distinctiveCap(total) {
|
|
990
|
+
return Math.max(1, Math.floor(0.1 * total));
|
|
991
|
+
}
|
|
992
|
+
function isDistinctiveToken(token, freq) {
|
|
993
|
+
const tok = token.toLowerCase();
|
|
994
|
+
if (tok.length < MIN_WORD_LEN || CODE_STOPWORDS.has(tok)) return false;
|
|
995
|
+
const df = freq.df.get(tok);
|
|
996
|
+
if (df === void 0) return true;
|
|
997
|
+
return df <= distinctiveCap(freq.total);
|
|
998
|
+
}
|
|
999
|
+
function diffHasDistinctiveOverlap(addedDiffText, memoryBody, freq) {
|
|
1000
|
+
const memoryTokens = new Set(tokenizeWords(memoryBody));
|
|
1001
|
+
if (memoryTokens.size === 0) return false;
|
|
1002
|
+
for (const tok of new Set(tokenizeWords(addedDiffText))) {
|
|
1003
|
+
if (memoryTokens.has(tok) && isDistinctiveToken(tok, freq)) return true;
|
|
1004
|
+
}
|
|
1005
|
+
return false;
|
|
1006
|
+
}
|
|
1007
|
+
|
|
929
1008
|
// src/skill-activation.ts
|
|
930
1009
|
function isSkill(fm) {
|
|
931
1010
|
return fm.type === "skill";
|
|
@@ -2894,7 +2973,7 @@ function pickLowercaseValuePattern(text) {
|
|
|
2894
2973
|
for (const match of text.matchAll(/\blowercase\s+([A-Za-z][A-Za-z0-9_.:-]{2,79})\s+([a-z][a-z0-9_.:-]{1,40})\b/g)) {
|
|
2895
2974
|
const key = match[1] ?? "";
|
|
2896
2975
|
const value = match[2] ?? "";
|
|
2897
|
-
if (!
|
|
2976
|
+
if (!isDistinctiveToken2(key, true) || isBoringValue(value)) continue;
|
|
2898
2977
|
candidates.push({
|
|
2899
2978
|
label: `${key}=${value}`,
|
|
2900
2979
|
pattern: `${escapeRegExp(key)}\\s*[:=]\\s*["']?${escapeRegExp(value)}["']?`,
|
|
@@ -2910,7 +2989,7 @@ function pickAssignmentPattern(text) {
|
|
|
2910
2989
|
const key = match[1] ?? "";
|
|
2911
2990
|
const operator = match[2] ?? "";
|
|
2912
2991
|
const value = match[3] ?? "";
|
|
2913
|
-
if (!
|
|
2992
|
+
if (!isDistinctiveToken2(key, true) || isBoringValue(value)) continue;
|
|
2914
2993
|
const label = `${key}${operator}${value}`;
|
|
2915
2994
|
candidates.push({
|
|
2916
2995
|
label,
|
|
@@ -2941,7 +3020,7 @@ function pickDistinctiveToken(text) {
|
|
|
2941
3020
|
const raw = (match[1] ?? match[2] ?? match[3] ?? "").trim();
|
|
2942
3021
|
const token = raw.replace(/^[^\w.-]+|[^\w.-]+$/g, "");
|
|
2943
3022
|
const isCodeLike = Boolean(match[1] ?? match[2]);
|
|
2944
|
-
if (!
|
|
3023
|
+
if (!isDistinctiveToken2(token, isCodeLike)) continue;
|
|
2945
3024
|
const key = token.toLowerCase();
|
|
2946
3025
|
const codeSpanBonus = match[1] ? 20 : match[2] ? 8 : 0;
|
|
2947
3026
|
const shapeBonus = /[-_.:]/.test(token) ? 3 : /[A-Z]/.test(token.slice(1)) ? 2 : /\d/.test(token) ? 1 : 0;
|
|
@@ -2952,7 +3031,7 @@ function pickDistinctiveToken(text) {
|
|
|
2952
3031
|
const best = [...candidates.values()].sort((a, b) => b.score - a.score)[0];
|
|
2953
3032
|
return best?.raw ?? null;
|
|
2954
3033
|
}
|
|
2955
|
-
function
|
|
3034
|
+
function isDistinctiveToken2(token, isCodeLike) {
|
|
2956
3035
|
if (token.length < 4 || token.length > 80) return false;
|
|
2957
3036
|
if (/^https?:\/\//i.test(token)) return false;
|
|
2958
3037
|
if (/^\d+$/.test(token)) return false;
|
|
@@ -2985,6 +3064,7 @@ export {
|
|
|
2985
3064
|
BRIEFING_PRESET_DEFAULTS,
|
|
2986
3065
|
CHARS_PER_TOKEN,
|
|
2987
3066
|
CODE_MAP_FILE,
|
|
3067
|
+
CODE_STOPWORDS,
|
|
2988
3068
|
CONFIG_FILE,
|
|
2989
3069
|
CrossRepoProvenanceSchema,
|
|
2990
3070
|
DECAY_DAYS,
|
|
@@ -2995,6 +3075,7 @@ export {
|
|
|
2995
3075
|
GUESSABLE_THRESHOLD,
|
|
2996
3076
|
HAIVE_DIR,
|
|
2997
3077
|
MEMORIES_DIR,
|
|
3078
|
+
MIN_WORD_LEN,
|
|
2998
3079
|
MemoryFrontmatterSchema,
|
|
2999
3080
|
MemoryScopeSchema,
|
|
3000
3081
|
MemoryStatusSchema,
|
|
@@ -3018,6 +3099,7 @@ export {
|
|
|
3018
3099
|
briefingMarkerPath,
|
|
3019
3100
|
briefingMarkersDir,
|
|
3020
3101
|
buildCodeMap,
|
|
3102
|
+
buildDocFrequency,
|
|
3021
3103
|
buildFrontmatter,
|
|
3022
3104
|
buildReport,
|
|
3023
3105
|
bumpRead,
|
|
@@ -3030,6 +3112,8 @@ export {
|
|
|
3030
3112
|
contractLockPath,
|
|
3031
3113
|
deriveConfidence,
|
|
3032
3114
|
diffContract,
|
|
3115
|
+
diffHasDistinctiveOverlap,
|
|
3116
|
+
distinctiveCap,
|
|
3033
3117
|
emptyUsage,
|
|
3034
3118
|
emptyUsageIndex,
|
|
3035
3119
|
enforcementDir,
|
|
@@ -3047,6 +3131,7 @@ export {
|
|
|
3047
3131
|
inferModulesFromPaths,
|
|
3048
3132
|
isAutoPromoteEligible,
|
|
3049
3133
|
isDecaying,
|
|
3134
|
+
isDistinctiveToken,
|
|
3050
3135
|
isFreshIsoDate,
|
|
3051
3136
|
isGlobPath,
|
|
3052
3137
|
isLikelyGuessable,
|
|
@@ -3106,6 +3191,7 @@ export {
|
|
|
3106
3191
|
synthesizeSelfEvalCases,
|
|
3107
3192
|
titleFromBody,
|
|
3108
3193
|
tokenizeQuery,
|
|
3194
|
+
tokenizeWords,
|
|
3109
3195
|
trackDependencies,
|
|
3110
3196
|
trackReads,
|
|
3111
3197
|
truncateToTokens,
|