@hiveai/core 0.11.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -718,6 +718,55 @@ declare function isGlobPath(p: string): boolean;
718
718
  declare function globToRegExp(pattern: string): RegExp;
719
719
  declare function relPathFrom(root: string, abs: string): string;
720
720
 
721
+ /**
722
+ * Distinctive-token corroboration for the anti-pattern gate.
723
+ *
724
+ * The pre-commit gate used to hard-block whenever a diff shared ANY ≥4-char token
725
+ * with an anchored gotcha's body. That fires on ubiquitous domain words ("memory",
726
+ * "sensor", "scope", "input", "version") and on version-bump diffs — blocking agents
727
+ * for nothing. The fix: a `literal` overlap only corroborates a BLOCK when at least
728
+ * one shared token is *distinctive* to that gotcha — i.e. rare across the gotcha
729
+ * corpus (low document frequency), like `BigInt`, `open-in-view`, `rec_7`. Common
730
+ * words can still surface the warning for review; they just can't hard-block.
731
+ *
732
+ * Pure module (no I/O), TF-IDF-style. Unit-tested in `test/distinctive.test.ts`.
733
+ */
734
+ /**
735
+ * Language keywords + ubiquitous code words that would match almost any memory body
736
+ * and so carry no distinguishing signal. Shared by the diff tokenizer and the
737
+ * distinctiveness check so "literal" stays meaningful.
738
+ */
739
+ declare const CODE_STOPWORDS: Set<string>;
740
+ /** Minimum token length kept for word-level matching (shorter tokens are too noisy). */
741
+ declare const MIN_WORD_LEN = 4;
742
+ /** Split text into lowercase word tokens (>= MIN_WORD_LEN, excluding code stopwords). */
743
+ declare function tokenizeWords(text: string): string[];
744
+ interface DocFrequency {
745
+ /** token -> number of documents (memory bodies) it appears in */
746
+ df: Map<string, number>;
747
+ /** total number of documents */
748
+ total: number;
749
+ }
750
+ /** Build per-token document frequency across a corpus of memory bodies. */
751
+ declare function buildDocFrequency(bodies: string[]): DocFrequency;
752
+ /**
753
+ * Document-frequency cap at/below which a token counts as distinctive. Deliberately
754
+ * strict — "distinctive" means *rare* (≈ the bottom 10% of the corpus), with a floor
755
+ * of 1 so a token appearing in a single memory is always distinctive. Strictness is
756
+ * intentional: blocking is the aggressive action, so we under-block rather than fire
757
+ * on a word that several gotchas happen to share.
758
+ */
759
+ declare function distinctiveCap(total: number): number;
760
+ /** True when `token` is distinctive (rare) within the corpus. */
761
+ declare function isDistinctiveToken(token: string, freq: DocFrequency): boolean;
762
+ /**
763
+ * True when the added diff text shares at least one *distinctive* word token with the
764
+ * memory body. This is the precise corroboration the block decision should require:
765
+ * "the change actually contains the specific thing this gotcha warns about", not
766
+ * "the change happens to mention a common domain word".
767
+ */
768
+ declare function diffHasDistinctiveOverlap(addedDiffText: string, memoryBody: string, freq: DocFrequency): boolean;
769
+
721
770
  /**
722
771
  * Progressive disclosure for `skill` memories.
723
772
  *
@@ -1417,4 +1466,4 @@ interface SensorSuggestionOptions {
1417
1466
  */
1418
1467
  declare function suggestSensorFromMemory(body: string, anchorPaths: string[], options?: SensorSuggestionOptions): Sensor | null;
1419
1468
 
1420
- export { AUTOPILOT_DEFAULTS, type Activation, type ActivationContext, ActivationSchema, type Anchor, AnchorSchema, type AntiPatternGate, type AutoPromoteRule, BRIEFING_MARKER_TTL_MS, BRIEFING_PRESET_DEFAULTS, type BreakingChange, type BriefingBudgetNumbers, type BriefingBudgetPreset, type BriefingMarker, type BudgetPart, type BudgetSlice, type BuildCodeMapOptions, CHARS_PER_TOKEN, CODE_MAP_FILE, CONFIG_FILE, type CodeExport, type CodeExportKind, type CodeFileEntry, type CodeMap, type CodeMapQueryOptions, type CollectTimelineOpts, type ConfidenceLevel, type ConfidenceThresholds, type ConflictCandidatePair, type ConflictCandidatesOpts, type ContractDiffResult, type ContractFile, type ContractSnapshot, CrossRepoProvenanceSchema, type CrossRepoReport, type CrossRepoSource, DECAY_DAYS, DEFAULT_AUTO_PROMOTE_RULE, DEFAULT_CONFIDENCE_THRESHOLDS, DEFAULT_CONFIG, DEFAULT_DORMANT_DAYS, type DepChange, type DepTrackResult, type DependencySnapshot, type EvalReport, type EvalSpec, GUESSABLE_THRESHOLD, HAIVE_DIR, type HaiveConfig, type HaivePaths, type ImpactOptions, type ImpactScore, type ImpactSummary, type ImpactTier, type LexicalRankResult, type LoadedMemory, MEMORIES_DIR, type Memory, type MemoryFrontmatter, MemoryFrontmatterSchema, type MemoryScope, MemoryScopeSchema, type MemoryStatus, MemoryStatusSchema, type MemoryType, MemoryTypeSchema, type MemoryUsage, PROJECT_CONTEXT_FILE, RUNTIME_JOURNAL_FILENAME, type ResolveProjectInfo, type RetirementSignal, type RetrievalAggregate, type RetrievalCase, type RetrievalCaseResult, type RuntimeJournalEntry, SESSION_RECAP_TTL_MS, STACK_PACK_TAG, type SelfEvalOptions, type Sensor, type SensorAggregate, type SensorCase, type SensorCaseResult, type SensorHit, SensorSchema, type SensorSuggestionOptions, type SensorTarget, type SkillActivation, type TimelineEntry, type TopicStatusPair, type TruncateOptions, type TruncateResult, USAGE_FILE, USAGE_LOG_DIR, USAGE_LOG_FILE, type UsageAggregate, type UsageEvent, type UsageIndex, type VerifyOptions, type VerifyResult, addedLinesFromDiff, aggregateRetrieval, aggregateSensors, aggregateUsage, allocateBudget, antiPatternGateParams, appendRuntimeJournalEntry, appendUsageEvent, briefingMarkerPath, briefingMarkersDir, buildCodeMap, buildFrontmatter, buildReport, bumpRead, codeMapPath, collectTimelineEntries, compareImpact, compileRegexSensor, computeImpact, configPath, contractLockPath, deriveConfidence, diffContract, emptyUsage, emptyUsageIndex, enforcementDir, estimateTokens, evaluateSkillActivation, extractActionsBriefBody, extractSnippet, findLexicalConflictPairs, findProjectRoot, findTopicStatusConflictPairs, firstMemoryOneLine, getUsage, globToRegExp, hasRecentBriefingMarker, inferModulesFromPaths, isAutoPromoteEligible, isDecaying, isFreshIsoDate, isGlobPath, isLikelyGuessable, isRetiredMemory, isSkill, isSkillSuppressed, isStackPackSeed, listMarkdownFilesRecursive, literalMatchesAllTokens, literalMatchesAnyToken, loadCodeMap, loadConfig, loadConfigSync, loadMemoriesFromDir, loadMemory, loadUsageIndex, memoryFilePath, memoryMatchesAnchorPaths, newMemoryId, normalizeSessionId, overallScore, parseMemory, parseSince, pathsOverlap, pickSnippetNeedle, pullCrossRepoSources, queryCodeMap, rankMemoriesLexical, readRecentBriefingMarker, readRuntimeJournalTail, readUsageEvents, recordApplied, recordRejection, relPathFrom, resolveBriefingBudget, resolveHaivePaths, resolveManifestFiles, resolveProjectInfo, retirementSignal, runRegexSensor, runSensors, runtimeJournalPath, saveCodeMap, saveConfig, saveUsageIndex, scoreRetrievalCase, scoreSensorCase, sensorAppliesToPath, sensorTargetsFromDiff, serializeMemory, snapshotContract, specificityScore, stripPrivate, suggestSensorFromMemory, suggestTopicKey, summarizeImpact, synthesizeSelfEvalCases, titleFromBody, tokenizeQuery, trackDependencies, trackReads, truncateToTokens, usageLogPath, usageLogSize, usagePath, verifyAnchor, watchContracts, writeBriefingMarker };
1469
+ export { AUTOPILOT_DEFAULTS, type Activation, type ActivationContext, ActivationSchema, type Anchor, AnchorSchema, type AntiPatternGate, type AutoPromoteRule, BRIEFING_MARKER_TTL_MS, BRIEFING_PRESET_DEFAULTS, type BreakingChange, type BriefingBudgetNumbers, type BriefingBudgetPreset, type BriefingMarker, type BudgetPart, type BudgetSlice, type BuildCodeMapOptions, CHARS_PER_TOKEN, CODE_MAP_FILE, CODE_STOPWORDS, CONFIG_FILE, type CodeExport, type CodeExportKind, type CodeFileEntry, type CodeMap, type CodeMapQueryOptions, type CollectTimelineOpts, type ConfidenceLevel, type ConfidenceThresholds, type ConflictCandidatePair, type ConflictCandidatesOpts, type ContractDiffResult, type ContractFile, type ContractSnapshot, CrossRepoProvenanceSchema, type CrossRepoReport, type CrossRepoSource, DECAY_DAYS, DEFAULT_AUTO_PROMOTE_RULE, DEFAULT_CONFIDENCE_THRESHOLDS, DEFAULT_CONFIG, DEFAULT_DORMANT_DAYS, type DepChange, type DepTrackResult, type DependencySnapshot, type DocFrequency, type EvalReport, type EvalSpec, GUESSABLE_THRESHOLD, HAIVE_DIR, type HaiveConfig, type HaivePaths, type ImpactOptions, type ImpactScore, type ImpactSummary, type ImpactTier, type LexicalRankResult, type LoadedMemory, MEMORIES_DIR, MIN_WORD_LEN, type Memory, type MemoryFrontmatter, MemoryFrontmatterSchema, type MemoryScope, MemoryScopeSchema, type MemoryStatus, MemoryStatusSchema, type MemoryType, MemoryTypeSchema, type MemoryUsage, PROJECT_CONTEXT_FILE, RUNTIME_JOURNAL_FILENAME, type ResolveProjectInfo, type RetirementSignal, type RetrievalAggregate, type RetrievalCase, type RetrievalCaseResult, type RuntimeJournalEntry, SESSION_RECAP_TTL_MS, STACK_PACK_TAG, type SelfEvalOptions, type Sensor, type SensorAggregate, type SensorCase, type SensorCaseResult, type SensorHit, SensorSchema, type SensorSuggestionOptions, type SensorTarget, type SkillActivation, type TimelineEntry, type TopicStatusPair, type TruncateOptions, type TruncateResult, USAGE_FILE, USAGE_LOG_DIR, USAGE_LOG_FILE, type UsageAggregate, type UsageEvent, type UsageIndex, type VerifyOptions, type VerifyResult, addedLinesFromDiff, aggregateRetrieval, aggregateSensors, aggregateUsage, allocateBudget, antiPatternGateParams, appendRuntimeJournalEntry, appendUsageEvent, briefingMarkerPath, briefingMarkersDir, buildCodeMap, buildDocFrequency, buildFrontmatter, buildReport, bumpRead, codeMapPath, collectTimelineEntries, compareImpact, compileRegexSensor, computeImpact, configPath, contractLockPath, deriveConfidence, diffContract, diffHasDistinctiveOverlap, distinctiveCap, emptyUsage, emptyUsageIndex, enforcementDir, estimateTokens, evaluateSkillActivation, extractActionsBriefBody, extractSnippet, findLexicalConflictPairs, findProjectRoot, findTopicStatusConflictPairs, firstMemoryOneLine, getUsage, globToRegExp, hasRecentBriefingMarker, inferModulesFromPaths, isAutoPromoteEligible, isDecaying, isDistinctiveToken, isFreshIsoDate, isGlobPath, isLikelyGuessable, isRetiredMemory, isSkill, isSkillSuppressed, isStackPackSeed, listMarkdownFilesRecursive, literalMatchesAllTokens, literalMatchesAnyToken, loadCodeMap, loadConfig, loadConfigSync, loadMemoriesFromDir, loadMemory, loadUsageIndex, memoryFilePath, memoryMatchesAnchorPaths, newMemoryId, normalizeSessionId, overallScore, parseMemory, parseSince, pathsOverlap, pickSnippetNeedle, pullCrossRepoSources, queryCodeMap, rankMemoriesLexical, readRecentBriefingMarker, readRuntimeJournalTail, readUsageEvents, recordApplied, recordRejection, relPathFrom, resolveBriefingBudget, resolveHaivePaths, resolveManifestFiles, resolveProjectInfo, retirementSignal, runRegexSensor, runSensors, runtimeJournalPath, saveCodeMap, saveConfig, saveUsageIndex, scoreRetrievalCase, scoreSensorCase, sensorAppliesToPath, sensorTargetsFromDiff, serializeMemory, snapshotContract, specificityScore, stripPrivate, suggestSensorFromMemory, suggestTopicKey, summarizeImpact, synthesizeSelfEvalCases, titleFromBody, tokenizeQuery, tokenizeWords, trackDependencies, trackReads, truncateToTokens, usageLogPath, usageLogSize, usagePath, verifyAnchor, watchContracts, writeBriefingMarker };
package/dist/index.js CHANGED
@@ -926,6 +926,85 @@ function isAutoPromoteEligible(fm, usage, rule = DEFAULT_AUTO_PROMOTE_RULE) {
926
926
  return usage.read_count >= rule.minReads;
927
927
  }
928
928
 
929
+ // src/distinctive.ts
930
+ var CODE_STOPWORDS = /* @__PURE__ */ new Set([
931
+ "import",
932
+ "export",
933
+ "function",
934
+ "return",
935
+ "const",
936
+ "let",
937
+ "var",
938
+ "class",
939
+ "public",
940
+ "private",
941
+ "protected",
942
+ "static",
943
+ "this",
944
+ "true",
945
+ "false",
946
+ "null",
947
+ "undefined",
948
+ "void",
949
+ "async",
950
+ "await",
951
+ "from",
952
+ "type",
953
+ "interface",
954
+ "extends",
955
+ "implements",
956
+ "number",
957
+ "string",
958
+ "boolean",
959
+ "value",
960
+ "default",
961
+ "case",
962
+ "break",
963
+ "continue",
964
+ "throw",
965
+ "catch",
966
+ "finally",
967
+ "else",
968
+ "while",
969
+ "for",
970
+ "new",
971
+ "super",
972
+ "yield",
973
+ "module",
974
+ "require",
975
+ "console"
976
+ ]);
977
+ var MIN_WORD_LEN = 4;
978
+ function tokenizeWords(text) {
979
+ return text.toLowerCase().split(/[^a-z0-9]+/).filter((t) => t.length >= MIN_WORD_LEN && !CODE_STOPWORDS.has(t));
980
+ }
981
+ function buildDocFrequency(bodies) {
982
+ const df = /* @__PURE__ */ new Map();
983
+ for (const body of bodies) {
984
+ const unique = new Set(tokenizeWords(body));
985
+ for (const tok of unique) df.set(tok, (df.get(tok) ?? 0) + 1);
986
+ }
987
+ return { df, total: bodies.length };
988
+ }
989
+ function distinctiveCap(total) {
990
+ return Math.max(1, Math.floor(0.1 * total));
991
+ }
992
+ function isDistinctiveToken(token, freq) {
993
+ const tok = token.toLowerCase();
994
+ if (tok.length < MIN_WORD_LEN || CODE_STOPWORDS.has(tok)) return false;
995
+ const df = freq.df.get(tok);
996
+ if (df === void 0) return true;
997
+ return df <= distinctiveCap(freq.total);
998
+ }
999
+ function diffHasDistinctiveOverlap(addedDiffText, memoryBody, freq) {
1000
+ const memoryTokens = new Set(tokenizeWords(memoryBody));
1001
+ if (memoryTokens.size === 0) return false;
1002
+ for (const tok of new Set(tokenizeWords(addedDiffText))) {
1003
+ if (memoryTokens.has(tok) && isDistinctiveToken(tok, freq)) return true;
1004
+ }
1005
+ return false;
1006
+ }
1007
+
929
1008
  // src/skill-activation.ts
930
1009
  function isSkill(fm) {
931
1010
  return fm.type === "skill";
@@ -2894,7 +2973,7 @@ function pickLowercaseValuePattern(text) {
2894
2973
  for (const match of text.matchAll(/\blowercase\s+([A-Za-z][A-Za-z0-9_.:-]{2,79})\s+([a-z][a-z0-9_.:-]{1,40})\b/g)) {
2895
2974
  const key = match[1] ?? "";
2896
2975
  const value = match[2] ?? "";
2897
- if (!isDistinctiveToken(key, true) || isBoringValue(value)) continue;
2976
+ if (!isDistinctiveToken2(key, true) || isBoringValue(value)) continue;
2898
2977
  candidates.push({
2899
2978
  label: `${key}=${value}`,
2900
2979
  pattern: `${escapeRegExp(key)}\\s*[:=]\\s*["']?${escapeRegExp(value)}["']?`,
@@ -2910,7 +2989,7 @@ function pickAssignmentPattern(text) {
2910
2989
  const key = match[1] ?? "";
2911
2990
  const operator = match[2] ?? "";
2912
2991
  const value = match[3] ?? "";
2913
- if (!isDistinctiveToken(key, true) || isBoringValue(value)) continue;
2992
+ if (!isDistinctiveToken2(key, true) || isBoringValue(value)) continue;
2914
2993
  const label = `${key}${operator}${value}`;
2915
2994
  candidates.push({
2916
2995
  label,
@@ -2941,7 +3020,7 @@ function pickDistinctiveToken(text) {
2941
3020
  const raw = (match[1] ?? match[2] ?? match[3] ?? "").trim();
2942
3021
  const token = raw.replace(/^[^\w.-]+|[^\w.-]+$/g, "");
2943
3022
  const isCodeLike = Boolean(match[1] ?? match[2]);
2944
- if (!isDistinctiveToken(token, isCodeLike)) continue;
3023
+ if (!isDistinctiveToken2(token, isCodeLike)) continue;
2945
3024
  const key = token.toLowerCase();
2946
3025
  const codeSpanBonus = match[1] ? 20 : match[2] ? 8 : 0;
2947
3026
  const shapeBonus = /[-_.:]/.test(token) ? 3 : /[A-Z]/.test(token.slice(1)) ? 2 : /\d/.test(token) ? 1 : 0;
@@ -2952,7 +3031,7 @@ function pickDistinctiveToken(text) {
2952
3031
  const best = [...candidates.values()].sort((a, b) => b.score - a.score)[0];
2953
3032
  return best?.raw ?? null;
2954
3033
  }
2955
- function isDistinctiveToken(token, isCodeLike) {
3034
+ function isDistinctiveToken2(token, isCodeLike) {
2956
3035
  if (token.length < 4 || token.length > 80) return false;
2957
3036
  if (/^https?:\/\//i.test(token)) return false;
2958
3037
  if (/^\d+$/.test(token)) return false;
@@ -2985,6 +3064,7 @@ export {
2985
3064
  BRIEFING_PRESET_DEFAULTS,
2986
3065
  CHARS_PER_TOKEN,
2987
3066
  CODE_MAP_FILE,
3067
+ CODE_STOPWORDS,
2988
3068
  CONFIG_FILE,
2989
3069
  CrossRepoProvenanceSchema,
2990
3070
  DECAY_DAYS,
@@ -2995,6 +3075,7 @@ export {
2995
3075
  GUESSABLE_THRESHOLD,
2996
3076
  HAIVE_DIR,
2997
3077
  MEMORIES_DIR,
3078
+ MIN_WORD_LEN,
2998
3079
  MemoryFrontmatterSchema,
2999
3080
  MemoryScopeSchema,
3000
3081
  MemoryStatusSchema,
@@ -3018,6 +3099,7 @@ export {
3018
3099
  briefingMarkerPath,
3019
3100
  briefingMarkersDir,
3020
3101
  buildCodeMap,
3102
+ buildDocFrequency,
3021
3103
  buildFrontmatter,
3022
3104
  buildReport,
3023
3105
  bumpRead,
@@ -3030,6 +3112,8 @@ export {
3030
3112
  contractLockPath,
3031
3113
  deriveConfidence,
3032
3114
  diffContract,
3115
+ diffHasDistinctiveOverlap,
3116
+ distinctiveCap,
3033
3117
  emptyUsage,
3034
3118
  emptyUsageIndex,
3035
3119
  enforcementDir,
@@ -3047,6 +3131,7 @@ export {
3047
3131
  inferModulesFromPaths,
3048
3132
  isAutoPromoteEligible,
3049
3133
  isDecaying,
3134
+ isDistinctiveToken,
3050
3135
  isFreshIsoDate,
3051
3136
  isGlobPath,
3052
3137
  isLikelyGuessable,
@@ -3106,6 +3191,7 @@ export {
3106
3191
  synthesizeSelfEvalCases,
3107
3192
  titleFromBody,
3108
3193
  tokenizeQuery,
3194
+ tokenizeWords,
3109
3195
  trackDependencies,
3110
3196
  trackReads,
3111
3197
  truncateToTokens,