uilint-duplicates 0.2.122 → 0.2.124
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-54SLRAFO.js → chunk-BAZQUJDS.js} +622 -15
- package/dist/chunk-BAZQUJDS.js.map +1 -0
- package/dist/index.d.ts +344 -8
- package/dist/index.js +53 -1
- package/dist/node.d.ts +1 -1
- package/dist/node.js +53 -1
- package/package.json +2 -2
- package/dist/chunk-54SLRAFO.js.map +0 -1
package/dist/index.d.ts
CHANGED
|
@@ -510,12 +510,18 @@ interface IndexOptions {
|
|
|
510
510
|
interface FindDuplicatesOptions$1 {
|
|
511
511
|
/** Path to search (defaults to current directory) */
|
|
512
512
|
path?: string;
|
|
513
|
-
/** Minimum similarity threshold (0-1). Default: 0.
|
|
513
|
+
/** Minimum similarity threshold (0-1). Default: 0.75 */
|
|
514
514
|
threshold?: number;
|
|
515
515
|
/** Minimum group size. Default: 2 */
|
|
516
516
|
minGroupSize?: number;
|
|
517
517
|
/** Filter by kind: component, hook, function */
|
|
518
518
|
kind?: ChunkKind;
|
|
519
|
+
/** Minimum confidence level: "high", "medium", "low". Default: "low" */
|
|
520
|
+
confidenceLevel?: "high" | "medium" | "low";
|
|
521
|
+
/** Use structural similarity boost. Default: true */
|
|
522
|
+
useStructuralBoost?: boolean;
|
|
523
|
+
/** Include duplicates within the same file. Default: true */
|
|
524
|
+
includeSameFile?: boolean;
|
|
519
525
|
}
|
|
520
526
|
interface SearchOptions {
|
|
521
527
|
/** Path to search (defaults to current directory) */
|
|
@@ -556,6 +562,8 @@ interface DuplicateGroup$1 {
|
|
|
556
562
|
avgSimilarity: number;
|
|
557
563
|
/** The kind of code in this group */
|
|
558
564
|
kind: ChunkKind;
|
|
565
|
+
/** Confidence level for this group */
|
|
566
|
+
confidence: "high" | "medium" | "low";
|
|
559
567
|
}
|
|
560
568
|
interface SearchResult {
|
|
561
569
|
/** File path */
|
|
@@ -607,10 +615,101 @@ declare function getIndexStats(path?: string): Promise<{
|
|
|
607
615
|
lastUpdated: string | null;
|
|
608
616
|
}>;
|
|
609
617
|
|
|
618
|
+
/**
|
|
619
|
+
* Confidence Level System
|
|
620
|
+
*
|
|
621
|
+
* Provides confidence levels for duplicate detection results
|
|
622
|
+
* to help users prioritize which duplicates to address.
|
|
623
|
+
*
|
|
624
|
+
* Confidence levels:
|
|
625
|
+
* - HIGH: Likely copy-paste or near-identical implementation. Should consolidate.
|
|
626
|
+
* - MEDIUM: Semantically similar code. Worth reviewing for potential abstraction.
|
|
627
|
+
* - LOW: Possibly related patterns. Optional to review.
|
|
628
|
+
*/
|
|
629
|
+
type ConfidenceLevel = "high" | "medium" | "low";
|
|
630
|
+
interface ConfidenceConfig {
|
|
631
|
+
/** Threshold for high confidence (default: 0.90) */
|
|
632
|
+
highThreshold: number;
|
|
633
|
+
/** Threshold for medium confidence (default: 0.75) */
|
|
634
|
+
mediumThreshold: number;
|
|
635
|
+
/** Threshold for low confidence / minimum reporting (default: 0.60) */
|
|
636
|
+
lowThreshold: number;
|
|
637
|
+
}
|
|
638
|
+
declare const DEFAULT_CONFIDENCE_CONFIG: ConfidenceConfig;
|
|
639
|
+
interface ConfidenceResult {
|
|
640
|
+
/** The confidence level */
|
|
641
|
+
level: ConfidenceLevel;
|
|
642
|
+
/** The raw similarity score (0-1) */
|
|
643
|
+
score: number;
|
|
644
|
+
/** Human-readable description of what this confidence level means */
|
|
645
|
+
description: string;
|
|
646
|
+
/** Recommended action for the user */
|
|
647
|
+
action: string;
|
|
648
|
+
/** Color for CLI/UI display */
|
|
649
|
+
color: "red" | "yellow" | "green";
|
|
650
|
+
}
|
|
651
|
+
/**
|
|
652
|
+
* Determine confidence level from a similarity score.
|
|
653
|
+
*
|
|
654
|
+
* @param score Similarity score between 0 and 1
|
|
655
|
+
* @param config Optional custom threshold configuration
|
|
656
|
+
* @returns The confidence level
|
|
657
|
+
*/
|
|
658
|
+
declare function getConfidenceLevel(score: number, config?: ConfidenceConfig): ConfidenceLevel;
|
|
659
|
+
/**
|
|
660
|
+
* Check if a score meets the minimum threshold for reporting.
|
|
661
|
+
*
|
|
662
|
+
* @param score Similarity score
|
|
663
|
+
* @param config Optional custom threshold configuration
|
|
664
|
+
* @returns True if the score should be reported
|
|
665
|
+
*/
|
|
666
|
+
declare function meetsMinimumThreshold(score: number, config?: ConfidenceConfig): boolean;
|
|
667
|
+
/**
|
|
668
|
+
* Get detailed confidence result with actionable guidance.
|
|
669
|
+
*
|
|
670
|
+
* @param score Similarity score between 0 and 1
|
|
671
|
+
* @param config Optional custom threshold configuration
|
|
672
|
+
* @returns Full confidence result with guidance
|
|
673
|
+
*/
|
|
674
|
+
declare function getConfidenceResult(score: number, config?: ConfidenceConfig): ConfidenceResult;
|
|
675
|
+
/**
|
|
676
|
+
* Get emoji indicator for confidence level (for CLI output).
|
|
677
|
+
*/
|
|
678
|
+
declare function getConfidenceEmoji(level: ConfidenceLevel): string;
|
|
679
|
+
/**
|
|
680
|
+
* Get ANSI color code for confidence level (for CLI output).
|
|
681
|
+
*/
|
|
682
|
+
declare function getConfidenceAnsiColor(level: ConfidenceLevel): string;
|
|
683
|
+
/**
|
|
684
|
+
* Format confidence for display in CLI.
|
|
685
|
+
*
|
|
686
|
+
* @param result Confidence result to format
|
|
687
|
+
* @param useEmoji Whether to include emoji (default: true)
|
|
688
|
+
* @param useColor Whether to include ANSI colors (default: false)
|
|
689
|
+
* @returns Formatted string
|
|
690
|
+
*/
|
|
691
|
+
declare function formatConfidence(result: ConfidenceResult, useEmoji?: boolean, useColor?: boolean): string;
|
|
692
|
+
/**
|
|
693
|
+
* Format confidence with full details for verbose output.
|
|
694
|
+
*/
|
|
695
|
+
declare function formatConfidenceVerbose(result: ConfidenceResult): string;
|
|
696
|
+
/**
|
|
697
|
+
* Compare two confidence levels.
|
|
698
|
+
* Returns: -1 if a < b, 0 if equal, 1 if a > b
|
|
699
|
+
*/
|
|
700
|
+
declare function compareConfidenceLevels(a: ConfidenceLevel, b: ConfidenceLevel): number;
|
|
701
|
+
/**
|
|
702
|
+
* Filter results by minimum confidence level.
|
|
703
|
+
*/
|
|
704
|
+
declare function filterByConfidence<T extends {
|
|
705
|
+
score: number;
|
|
706
|
+
}>(results: T[], minLevel: ConfidenceLevel, config?: ConfidenceConfig): T[];
|
|
707
|
+
|
|
610
708
|
/**
|
|
611
709
|
* Duplicate Finder
|
|
612
710
|
*
|
|
613
711
|
* Finds groups of semantically similar code chunks using the vector index.
|
|
712
|
+
* Enhanced with structural similarity scoring and confidence levels.
|
|
614
713
|
*/
|
|
615
714
|
|
|
616
715
|
interface DuplicateMember {
|
|
@@ -618,8 +717,14 @@ interface DuplicateMember {
|
|
|
618
717
|
id: string;
|
|
619
718
|
/** Chunk metadata */
|
|
620
719
|
metadata: StoredChunkMetadata;
|
|
621
|
-
/** Similarity score to the group centroid/first member */
|
|
720
|
+
/** Similarity score to the group centroid/first member (semantic only) */
|
|
622
721
|
score: number;
|
|
722
|
+
/** Combined score (semantic + structural) */
|
|
723
|
+
combinedScore?: number;
|
|
724
|
+
/** Structural similarity score */
|
|
725
|
+
structuralScore?: number;
|
|
726
|
+
/** Confidence level for this match */
|
|
727
|
+
confidence?: ConfidenceLevel;
|
|
623
728
|
}
|
|
624
729
|
interface DuplicateGroup {
|
|
625
730
|
/** Members of the duplicate group */
|
|
@@ -628,9 +733,11 @@ interface DuplicateGroup {
|
|
|
628
733
|
avgSimilarity: number;
|
|
629
734
|
/** The kind of code in this group (component, hook, function) */
|
|
630
735
|
kind: ChunkKind;
|
|
736
|
+
/** Overall confidence level for the group */
|
|
737
|
+
confidence: ConfidenceLevel;
|
|
631
738
|
}
|
|
632
739
|
interface FindDuplicatesOptions {
|
|
633
|
-
/** Minimum cosine similarity threshold (0-1). Default: 0.85 */
|
|
740
|
+
/** Minimum cosine similarity threshold (0-1). Default: 0.75 (lowered from 0.85) */
|
|
634
741
|
threshold?: number;
|
|
635
742
|
/** Minimum group size. Default: 2 */
|
|
636
743
|
minGroupSize?: number;
|
|
@@ -638,6 +745,12 @@ interface FindDuplicatesOptions {
|
|
|
638
745
|
kind?: ChunkKind;
|
|
639
746
|
/** Exclude specific file paths */
|
|
640
747
|
excludePaths?: string[];
|
|
748
|
+
/** Include duplicates within the same file. Default: true */
|
|
749
|
+
includeSameFile?: boolean;
|
|
750
|
+
/** Use structural similarity to boost scores. Default: true */
|
|
751
|
+
useStructuralBoost?: boolean;
|
|
752
|
+
/** Filter results by minimum confidence level */
|
|
753
|
+
confidenceFilter?: ConfidenceLevel;
|
|
641
754
|
}
|
|
642
755
|
/**
|
|
643
756
|
* Find groups of semantically similar code.
|
|
@@ -645,9 +758,10 @@ interface FindDuplicatesOptions {
|
|
|
645
758
|
* Algorithm:
|
|
646
759
|
* 1. Iterate through all chunks
|
|
647
760
|
* 2. For each unprocessed chunk, find similar chunks above threshold
|
|
648
|
-
* 3.
|
|
649
|
-
* 4.
|
|
650
|
-
* 5.
|
|
761
|
+
* 3. Calculate combined score (semantic + structural)
|
|
762
|
+
* 4. Group similar chunks together
|
|
763
|
+
* 5. Assign confidence levels
|
|
764
|
+
* 6. Sort groups by size and similarity
|
|
651
765
|
*/
|
|
652
766
|
declare function findDuplicateGroups(vectorStore: VectorStore, metadataStore: MetadataStore, options?: FindDuplicatesOptions): DuplicateGroup[];
|
|
653
767
|
/**
|
|
@@ -684,7 +798,7 @@ interface DuplicateScore {
|
|
|
684
798
|
* Calculate the size ratio between two code chunks.
|
|
685
799
|
* Returns a value between 0 and 1 where 1 means identical size.
|
|
686
800
|
*/
|
|
687
|
-
declare function calculateSizeRatio(chunk1: StoredChunkMetadata, chunk2: StoredChunkMetadata): number;
|
|
801
|
+
declare function calculateSizeRatio$1(chunk1: StoredChunkMetadata, chunk2: StoredChunkMetadata): number;
|
|
688
802
|
/**
|
|
689
803
|
* Calculate a combined duplicate score.
|
|
690
804
|
*/
|
|
@@ -702,6 +816,228 @@ declare function sortDuplicateGroups<T extends {
|
|
|
702
816
|
members: unknown[];
|
|
703
817
|
}>(groups: T[]): T[];
|
|
704
818
|
|
|
819
|
+
/**
|
|
820
|
+
* Structural Similarity Scorer
|
|
821
|
+
*
|
|
822
|
+
* Calculates similarity based on code structure (props, hooks, JSX elements)
|
|
823
|
+
* independent of semantic embeddings. This complements embedding-based
|
|
824
|
+
* similarity by catching cases where code structure is similar but
|
|
825
|
+
* variable/prop names differ.
|
|
826
|
+
*/
|
|
827
|
+
|
|
828
|
+
interface StructuralScore {
|
|
829
|
+
/** Jaccard similarity of prop names (0-1) */
|
|
830
|
+
propsOverlap: number;
|
|
831
|
+
/** Jaccard similarity of JSX elements (0-1) */
|
|
832
|
+
jsxOverlap: number;
|
|
833
|
+
/** Jaccard similarity of hooks used (0-1) */
|
|
834
|
+
hooksOverlap: number;
|
|
835
|
+
/** Line count ratio (0-1, 1 = same size) */
|
|
836
|
+
sizeRatio: number;
|
|
837
|
+
/** Combined weighted score (0-1) */
|
|
838
|
+
combined: number;
|
|
839
|
+
}
|
|
840
|
+
interface StructuralScorerWeights {
|
|
841
|
+
/** Weight for props overlap (default: 0.25) */
|
|
842
|
+
props: number;
|
|
843
|
+
/** Weight for JSX elements overlap (default: 0.35) */
|
|
844
|
+
jsx: number;
|
|
845
|
+
/** Weight for hooks overlap (default: 0.25) */
|
|
846
|
+
hooks: number;
|
|
847
|
+
/** Weight for size similarity (default: 0.15) */
|
|
848
|
+
size: number;
|
|
849
|
+
}
|
|
850
|
+
/**
|
|
851
|
+
* Calculate Jaccard similarity between two sets of strings.
|
|
852
|
+
* Returns 1 if both sets are empty (vacuously similar).
|
|
853
|
+
*/
|
|
854
|
+
declare function jaccard(a: string[], b: string[]): number;
|
|
855
|
+
/**
|
|
856
|
+
* Calculate size ratio between two code chunks.
|
|
857
|
+
* Returns a value between 0 and 1, where 1 means identical size.
|
|
858
|
+
*/
|
|
859
|
+
declare function calculateSizeRatio(linesA: number, linesB: number): number;
|
|
860
|
+
/**
|
|
861
|
+
* Calculate structural similarity between two chunks.
|
|
862
|
+
*
|
|
863
|
+
* This function compares the structural features of two code chunks:
|
|
864
|
+
* - Props/parameters they accept
|
|
865
|
+
* - JSX elements they render
|
|
866
|
+
* - Hooks they use
|
|
867
|
+
* - Relative size
|
|
868
|
+
*
|
|
869
|
+
* @param a First chunk metadata
|
|
870
|
+
* @param b Second chunk metadata
|
|
871
|
+
* @param weights Optional custom weights for each feature
|
|
872
|
+
* @returns Structural similarity score with component breakdowns
|
|
873
|
+
*/
|
|
874
|
+
declare function calculateStructuralSimilarity(a: StoredChunkMetadata, b: StoredChunkMetadata, weights?: StructuralScorerWeights): StructuralScore;
|
|
875
|
+
/**
|
|
876
|
+
* Quick check if two chunks have high structural similarity.
|
|
877
|
+
* Useful for fast pre-filtering before expensive embedding comparison.
|
|
878
|
+
*
|
|
879
|
+
* @param a First chunk metadata
|
|
880
|
+
* @param b Second chunk metadata
|
|
881
|
+
* @param threshold Minimum combined score to consider similar (default: 0.5)
|
|
882
|
+
*/
|
|
883
|
+
declare function hasHighStructuralSimilarity(a: StoredChunkMetadata, b: StoredChunkMetadata, threshold?: number): boolean;
|
|
884
|
+
/**
|
|
885
|
+
* Find structurally similar chunks from a list.
|
|
886
|
+
* Returns chunks sorted by structural similarity (highest first).
|
|
887
|
+
*
|
|
888
|
+
* @param target The chunk to compare against
|
|
889
|
+
* @param candidates List of candidate chunks to compare
|
|
890
|
+
* @param threshold Minimum similarity threshold
|
|
891
|
+
* @param limit Maximum number of results to return
|
|
892
|
+
*/
|
|
893
|
+
declare function findStructurallySimilar(target: StoredChunkMetadata, candidates: StoredChunkMetadata[], threshold?: number, limit?: number): Array<{
|
|
894
|
+
metadata: StoredChunkMetadata;
|
|
895
|
+
score: StructuralScore;
|
|
896
|
+
}>;
|
|
897
|
+
|
|
898
|
+
/**
|
|
899
|
+
* Code Normalizer
|
|
900
|
+
*
|
|
901
|
+
* Normalizes code before comparison to improve duplicate detection:
|
|
902
|
+
* 1. Replaces identifiers with canonical placeholders (for near-identical detection)
|
|
903
|
+
* 2. Normalizes semantic equivalents (size/dimension, onClick/onPress)
|
|
904
|
+
* 3. Strips comments and normalizes whitespace
|
|
905
|
+
*
|
|
906
|
+
* This is particularly useful for detecting utility functions that are
|
|
907
|
+
* copy-pasted with only variable name changes.
|
|
908
|
+
*/
|
|
909
|
+
interface NormalizationOptions {
|
|
910
|
+
/** Replace all local identifiers with placeholders (aggressive) */
|
|
911
|
+
normalizeIdentifiers?: boolean;
|
|
912
|
+
/** Normalize semantic equivalents like size/dimension */
|
|
913
|
+
normalizeSemantics?: boolean;
|
|
914
|
+
/** Strip comments from code */
|
|
915
|
+
stripComments?: boolean;
|
|
916
|
+
/** Normalize whitespace to single spaces */
|
|
917
|
+
normalizeWhitespace?: boolean;
|
|
918
|
+
}
|
|
919
|
+
/**
|
|
920
|
+
* Normalize code for comparison.
|
|
921
|
+
*
|
|
922
|
+
* @param code The source code to normalize
|
|
923
|
+
* @param options Normalization options
|
|
924
|
+
* @returns Normalized code string
|
|
925
|
+
*/
|
|
926
|
+
declare function normalizeCode(code: string, options?: NormalizationOptions): string;
|
|
927
|
+
/**
|
|
928
|
+
* Calculate Levenshtein distance between two strings.
|
|
929
|
+
* Used for fuzzy matching of normalized code.
|
|
930
|
+
*/
|
|
931
|
+
declare function levenshteinDistance(a: string, b: string): number;
|
|
932
|
+
/**
|
|
933
|
+
* Calculate normalized similarity between two code snippets.
|
|
934
|
+
* Returns 1.0 for identical normalized code, lower for differences.
|
|
935
|
+
*
|
|
936
|
+
* @param codeA First code snippet
|
|
937
|
+
* @param codeB Second code snippet
|
|
938
|
+
* @param options Normalization options
|
|
939
|
+
* @returns Similarity score between 0 and 1
|
|
940
|
+
*/
|
|
941
|
+
declare function calculateNormalizedSimilarity(codeA: string, codeB: string, options?: NormalizationOptions): number;
|
|
942
|
+
/**
|
|
943
|
+
* Quick check if two code snippets are near-identical after normalization.
|
|
944
|
+
*
|
|
945
|
+
* @param codeA First code snippet
|
|
946
|
+
* @param codeB Second code snippet
|
|
947
|
+
* @param threshold Minimum similarity to consider near-identical (default: 0.95)
|
|
948
|
+
* @returns True if the code is near-identical
|
|
949
|
+
*/
|
|
950
|
+
declare function isNearIdentical(codeA: string, codeB: string, threshold?: number): boolean;
|
|
951
|
+
/**
|
|
952
|
+
* Prepare code for embedding by applying light normalization.
|
|
953
|
+
* This is less aggressive than full normalization and is meant
|
|
954
|
+
* to improve embedding quality without losing semantic meaning.
|
|
955
|
+
*
|
|
956
|
+
* @param code The source code
|
|
957
|
+
* @returns Lightly normalized code for embedding
|
|
958
|
+
*/
|
|
959
|
+
declare function prepareForEmbedding(code: string): string;
|
|
960
|
+
|
|
961
|
+
/**
|
|
962
|
+
* Combined Scorer
|
|
963
|
+
*
|
|
964
|
+
* Combines multiple similarity signals into a final score:
|
|
965
|
+
* - Semantic similarity (from embeddings)
|
|
966
|
+
* - Structural similarity (from metadata: props, JSX, hooks)
|
|
967
|
+
* - Normalized similarity (from AST normalization - optional, expensive)
|
|
968
|
+
*
|
|
969
|
+
* The combined approach catches more duplicates than embedding alone:
|
|
970
|
+
* - Structural catches same-structure/different-names cases
|
|
971
|
+
* - Normalization catches copy-paste with renamed variables
|
|
972
|
+
* - Semantic catches conceptually similar but differently structured code
|
|
973
|
+
*/
|
|
974
|
+
|
|
975
|
+
interface CombinedScore {
|
|
976
|
+
/** Final combined score (0-1) */
|
|
977
|
+
final: number;
|
|
978
|
+
/** Semantic embedding similarity (0-1) */
|
|
979
|
+
semantic: number;
|
|
980
|
+
/** Structural metadata similarity (0-1) */
|
|
981
|
+
structural: number;
|
|
982
|
+
/** Detailed structural breakdown */
|
|
983
|
+
structuralDetails: StructuralScore;
|
|
984
|
+
/** Normalized code similarity (0-1, only if computed) */
|
|
985
|
+
normalized?: number;
|
|
986
|
+
/** Confidence level based on final score */
|
|
987
|
+
confidence: ConfidenceLevel;
|
|
988
|
+
/** Detailed confidence result */
|
|
989
|
+
confidenceDetails: ConfidenceResult;
|
|
990
|
+
}
|
|
991
|
+
interface CombinedScorerOptions {
|
|
992
|
+
/** Weight for semantic similarity (default: 0.5) */
|
|
993
|
+
semanticWeight?: number;
|
|
994
|
+
/** Weight for structural similarity (default: 0.3) */
|
|
995
|
+
structuralWeight?: number;
|
|
996
|
+
/** Weight for normalized similarity (default: 0.2) */
|
|
997
|
+
normalizedWeight?: number;
|
|
998
|
+
/** Whether to compute normalized similarity (expensive) */
|
|
999
|
+
includeNormalized?: boolean;
|
|
1000
|
+
}
|
|
1001
|
+
declare const DEFAULT_COMBINED_SCORER_OPTIONS: Required<CombinedScorerOptions>;
|
|
1002
|
+
/**
|
|
1003
|
+
* Calculate combined similarity score using multiple signals.
|
|
1004
|
+
*
|
|
1005
|
+
* @param semanticScore Embedding-based similarity score (0-1)
|
|
1006
|
+
* @param metadataA Metadata for first chunk
|
|
1007
|
+
* @param metadataB Metadata for second chunk
|
|
1008
|
+
* @param codeA Source code of first chunk (optional, needed for normalization)
|
|
1009
|
+
* @param codeB Source code of second chunk (optional, needed for normalization)
|
|
1010
|
+
* @param options Scoring options
|
|
1011
|
+
* @returns Combined score with all components
|
|
1012
|
+
*/
|
|
1013
|
+
declare function calculateCombinedScore(semanticScore: number, metadataA: StoredChunkMetadata, metadataB: StoredChunkMetadata, codeA?: string, codeB?: string, options?: CombinedScorerOptions): CombinedScore;
|
|
1014
|
+
/**
|
|
1015
|
+
* Quick pre-filter check using only structural similarity.
|
|
1016
|
+
* Use this to avoid expensive embedding comparisons for obviously dissimilar code.
|
|
1017
|
+
*
|
|
1018
|
+
* @param metadataA First chunk metadata
|
|
1019
|
+
* @param metadataB Second chunk metadata
|
|
1020
|
+
* @param threshold Minimum structural similarity to consider (default: 0.3)
|
|
1021
|
+
* @returns True if chunks are potentially similar enough to warrant full comparison
|
|
1022
|
+
*/
|
|
1023
|
+
declare function isPotentialDuplicate(metadataA: StoredChunkMetadata, metadataB: StoredChunkMetadata, threshold?: number): boolean;
|
|
1024
|
+
/**
|
|
1025
|
+
* Calculate quick similarity using only structural features.
|
|
1026
|
+
* Useful when embedding scores are not available.
|
|
1027
|
+
*/
|
|
1028
|
+
declare function calculateQuickScore(metadataA: StoredChunkMetadata, metadataB: StoredChunkMetadata): {
|
|
1029
|
+
score: number;
|
|
1030
|
+
confidence: ConfidenceLevel;
|
|
1031
|
+
};
|
|
1032
|
+
/**
|
|
1033
|
+
* Determine the best action based on combined score.
|
|
1034
|
+
*/
|
|
1035
|
+
declare function getRecommendedAction(score: CombinedScore): string;
|
|
1036
|
+
/**
|
|
1037
|
+
* Format combined score for display.
|
|
1038
|
+
*/
|
|
1039
|
+
declare function formatCombinedScore(score: CombinedScore, verbose?: boolean): string;
|
|
1040
|
+
|
|
705
1041
|
/**
|
|
706
1042
|
* File Tracker
|
|
707
1043
|
*
|
|
@@ -780,4 +1116,4 @@ declare class FileTracker {
|
|
|
780
1116
|
};
|
|
781
1117
|
}
|
|
782
1118
|
|
|
783
|
-
export { type ChunkKind, type ChunkMetadata, type ChunkingOptions, type CodeChunk, type DuplicateGroup$1 as DuplicateGroup, type DuplicateGroupMember, type DuplicateMember, type DuplicateScore, type EmbeddingOptions, type EmbeddingResult, type FileChange, type FileHashEntry, FileTracker, type FindDuplicatesOptions$1 as FindDuplicatesOptions, type HashStore, IncrementalIndexer, type IndexManifest, type IndexOptions, type IndexUpdateResult, type IndexerOptions, type DuplicateGroup as InternalDuplicateGroup, type FindDuplicatesOptions as InternalFindDuplicatesOptions, MetadataStore, OllamaEmbeddingClient, type SearchOptions, type SearchResult, type SimilarLocationOptions, type SimilarityResult, type StoredChunkMetadata, VectorStore, type VectorStoreOptions, calculateDuplicateScore, calculateGroupAverageSimilarity, calculateSizeRatio, chunkFile, clearIndexerCache, createIndexer, findDuplicateGroups, findDuplicates, findSimilarAtLocation, findSimilarToLocation, findSimilarToQuery, getIndexStats, getOllamaEmbeddingClient, hasIndex, hashContent, hashContentSync, indexDirectory, prepareEmbeddingInput, searchSimilar, sortDuplicateGroups };
|
|
1119
|
+
export { type ChunkKind, type ChunkMetadata, type ChunkingOptions, type CodeChunk, type CombinedScore, type CombinedScorerOptions, type ConfidenceConfig, type ConfidenceLevel, type ConfidenceResult, DEFAULT_COMBINED_SCORER_OPTIONS, DEFAULT_CONFIDENCE_CONFIG, type DuplicateGroup$1 as DuplicateGroup, type DuplicateGroupMember, type DuplicateMember, type DuplicateScore, type EmbeddingOptions, type EmbeddingResult, type FileChange, type FileHashEntry, FileTracker, type FindDuplicatesOptions$1 as FindDuplicatesOptions, type HashStore, IncrementalIndexer, type IndexManifest, type IndexOptions, type IndexUpdateResult, type IndexerOptions, type DuplicateGroup as InternalDuplicateGroup, type FindDuplicatesOptions as InternalFindDuplicatesOptions, MetadataStore, type NormalizationOptions, OllamaEmbeddingClient, type SearchOptions, type SearchResult, type SimilarLocationOptions, type SimilarityResult, type StoredChunkMetadata, type StructuralScore, type StructuralScorerWeights, VectorStore, type VectorStoreOptions, calculateCombinedScore, calculateDuplicateScore, calculateGroupAverageSimilarity, calculateNormalizedSimilarity, calculateQuickScore, calculateSizeRatio$1 as calculateSizeRatio, calculateStructuralSimilarity, calculateSizeRatio as calculateStructuralSizeRatio, chunkFile, clearIndexerCache, compareConfidenceLevels, createIndexer, filterByConfidence, findDuplicateGroups, findDuplicates, findSimilarAtLocation, findSimilarToLocation, findSimilarToQuery, findStructurallySimilar, formatCombinedScore, formatConfidence, formatConfidenceVerbose, getConfidenceAnsiColor, getConfidenceEmoji, getConfidenceLevel, getConfidenceResult, getIndexStats, getOllamaEmbeddingClient, getRecommendedAction, hasHighStructuralSimilarity, hasIndex, hashContent, hashContentSync, indexDirectory, isNearIdentical, isPotentialDuplicate, jaccard, levenshteinDistance, meetsMinimumThreshold, normalizeCode, prepareEmbeddingInput, prepareForEmbedding, searchSimilar, sortDuplicateGroups };
|
package/dist/index.js
CHANGED
|
@@ -1,54 +1,106 @@
|
|
|
1
1
|
import {
|
|
2
|
+
DEFAULT_COMBINED_SCORER_OPTIONS,
|
|
3
|
+
DEFAULT_CONFIDENCE_CONFIG,
|
|
2
4
|
FileTracker,
|
|
3
5
|
IncrementalIndexer,
|
|
4
6
|
MetadataStore,
|
|
5
7
|
OllamaEmbeddingClient,
|
|
6
8
|
VectorStore,
|
|
9
|
+
calculateCombinedScore,
|
|
7
10
|
calculateDuplicateScore,
|
|
8
11
|
calculateGroupAverageSimilarity,
|
|
12
|
+
calculateNormalizedSimilarity,
|
|
13
|
+
calculateQuickScore,
|
|
9
14
|
calculateSizeRatio,
|
|
15
|
+
calculateSizeRatio2,
|
|
16
|
+
calculateStructuralSimilarity,
|
|
10
17
|
chunkFile,
|
|
11
18
|
clearIndexerCache,
|
|
19
|
+
compareConfidenceLevels,
|
|
12
20
|
createIndexer,
|
|
21
|
+
filterByConfidence,
|
|
13
22
|
findDuplicateGroups,
|
|
14
23
|
findDuplicates,
|
|
15
24
|
findSimilarAtLocation,
|
|
16
25
|
findSimilarToLocation,
|
|
17
26
|
findSimilarToQuery,
|
|
27
|
+
findStructurallySimilar,
|
|
28
|
+
formatCombinedScore,
|
|
29
|
+
formatConfidence,
|
|
30
|
+
formatConfidenceVerbose,
|
|
31
|
+
getConfidenceAnsiColor,
|
|
32
|
+
getConfidenceEmoji,
|
|
33
|
+
getConfidenceLevel,
|
|
34
|
+
getConfidenceResult,
|
|
18
35
|
getIndexStats,
|
|
19
36
|
getOllamaEmbeddingClient,
|
|
37
|
+
getRecommendedAction,
|
|
38
|
+
hasHighStructuralSimilarity,
|
|
20
39
|
hasIndex,
|
|
21
40
|
hashContent,
|
|
22
41
|
hashContentSync,
|
|
23
42
|
indexDirectory,
|
|
43
|
+
isNearIdentical,
|
|
44
|
+
isPotentialDuplicate,
|
|
45
|
+
jaccard,
|
|
46
|
+
levenshteinDistance,
|
|
47
|
+
meetsMinimumThreshold,
|
|
48
|
+
normalizeCode,
|
|
24
49
|
prepareEmbeddingInput,
|
|
50
|
+
prepareForEmbedding,
|
|
25
51
|
searchSimilar,
|
|
26
52
|
sortDuplicateGroups
|
|
27
|
-
} from "./chunk-
|
|
53
|
+
} from "./chunk-BAZQUJDS.js";
|
|
28
54
|
export {
|
|
55
|
+
DEFAULT_COMBINED_SCORER_OPTIONS,
|
|
56
|
+
DEFAULT_CONFIDENCE_CONFIG,
|
|
29
57
|
FileTracker,
|
|
30
58
|
IncrementalIndexer,
|
|
31
59
|
MetadataStore,
|
|
32
60
|
OllamaEmbeddingClient,
|
|
33
61
|
VectorStore,
|
|
62
|
+
calculateCombinedScore,
|
|
34
63
|
calculateDuplicateScore,
|
|
35
64
|
calculateGroupAverageSimilarity,
|
|
65
|
+
calculateNormalizedSimilarity,
|
|
66
|
+
calculateQuickScore,
|
|
36
67
|
calculateSizeRatio,
|
|
68
|
+
calculateStructuralSimilarity,
|
|
69
|
+
calculateSizeRatio2 as calculateStructuralSizeRatio,
|
|
37
70
|
chunkFile,
|
|
38
71
|
clearIndexerCache,
|
|
72
|
+
compareConfidenceLevels,
|
|
39
73
|
createIndexer,
|
|
74
|
+
filterByConfidence,
|
|
40
75
|
findDuplicateGroups,
|
|
41
76
|
findDuplicates,
|
|
42
77
|
findSimilarAtLocation,
|
|
43
78
|
findSimilarToLocation,
|
|
44
79
|
findSimilarToQuery,
|
|
80
|
+
findStructurallySimilar,
|
|
81
|
+
formatCombinedScore,
|
|
82
|
+
formatConfidence,
|
|
83
|
+
formatConfidenceVerbose,
|
|
84
|
+
getConfidenceAnsiColor,
|
|
85
|
+
getConfidenceEmoji,
|
|
86
|
+
getConfidenceLevel,
|
|
87
|
+
getConfidenceResult,
|
|
45
88
|
getIndexStats,
|
|
46
89
|
getOllamaEmbeddingClient,
|
|
90
|
+
getRecommendedAction,
|
|
91
|
+
hasHighStructuralSimilarity,
|
|
47
92
|
hasIndex,
|
|
48
93
|
hashContent,
|
|
49
94
|
hashContentSync,
|
|
50
95
|
indexDirectory,
|
|
96
|
+
isNearIdentical,
|
|
97
|
+
isPotentialDuplicate,
|
|
98
|
+
jaccard,
|
|
99
|
+
levenshteinDistance,
|
|
100
|
+
meetsMinimumThreshold,
|
|
101
|
+
normalizeCode,
|
|
51
102
|
prepareEmbeddingInput,
|
|
103
|
+
prepareForEmbedding,
|
|
52
104
|
searchSimilar,
|
|
53
105
|
sortDuplicateGroups
|
|
54
106
|
};
|
package/dist/node.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export { ChunkKind, ChunkMetadata, ChunkingOptions, CodeChunk, DuplicateGroup, DuplicateGroupMember, DuplicateMember, DuplicateScore, EmbeddingOptions, EmbeddingResult, FileChange, FileHashEntry, FileTracker, FindDuplicatesOptions, HashStore, IncrementalIndexer, IndexManifest, IndexOptions, IndexUpdateResult, IndexerOptions, InternalDuplicateGroup, InternalFindDuplicatesOptions, MetadataStore, OllamaEmbeddingClient, SearchOptions, SearchResult, SimilarLocationOptions, SimilarityResult, StoredChunkMetadata, VectorStore, VectorStoreOptions, calculateDuplicateScore, calculateGroupAverageSimilarity, calculateSizeRatio, chunkFile, clearIndexerCache, createIndexer, findDuplicateGroups, findDuplicates, findSimilarAtLocation, findSimilarToLocation, findSimilarToQuery, getIndexStats, getOllamaEmbeddingClient, hasIndex, hashContent, hashContentSync, indexDirectory, prepareEmbeddingInput, searchSimilar, sortDuplicateGroups } from './index.js';
|
|
1
|
+
export { ChunkKind, ChunkMetadata, ChunkingOptions, CodeChunk, CombinedScore, CombinedScorerOptions, ConfidenceConfig, ConfidenceLevel, ConfidenceResult, DEFAULT_COMBINED_SCORER_OPTIONS, DEFAULT_CONFIDENCE_CONFIG, DuplicateGroup, DuplicateGroupMember, DuplicateMember, DuplicateScore, EmbeddingOptions, EmbeddingResult, FileChange, FileHashEntry, FileTracker, FindDuplicatesOptions, HashStore, IncrementalIndexer, IndexManifest, IndexOptions, IndexUpdateResult, IndexerOptions, InternalDuplicateGroup, InternalFindDuplicatesOptions, MetadataStore, NormalizationOptions, OllamaEmbeddingClient, SearchOptions, SearchResult, SimilarLocationOptions, SimilarityResult, StoredChunkMetadata, StructuralScore, StructuralScorerWeights, VectorStore, VectorStoreOptions, calculateCombinedScore, calculateDuplicateScore, calculateGroupAverageSimilarity, calculateNormalizedSimilarity, calculateQuickScore, calculateSizeRatio, calculateStructuralSimilarity, calculateStructuralSizeRatio, chunkFile, clearIndexerCache, compareConfidenceLevels, createIndexer, filterByConfidence, findDuplicateGroups, findDuplicates, findSimilarAtLocation, findSimilarToLocation, findSimilarToQuery, findStructurallySimilar, formatCombinedScore, formatConfidence, formatConfidenceVerbose, getConfidenceAnsiColor, getConfidenceEmoji, getConfidenceLevel, getConfidenceResult, getIndexStats, getOllamaEmbeddingClient, getRecommendedAction, hasHighStructuralSimilarity, hasIndex, hashContent, hashContentSync, indexDirectory, isNearIdentical, isPotentialDuplicate, jaccard, levenshteinDistance, meetsMinimumThreshold, normalizeCode, prepareEmbeddingInput, prepareForEmbedding, searchSimilar, sortDuplicateGroups } from './index.js';
|
package/dist/node.js
CHANGED
|
@@ -1,54 +1,106 @@
|
|
|
1
1
|
import {
|
|
2
|
+
DEFAULT_COMBINED_SCORER_OPTIONS,
|
|
3
|
+
DEFAULT_CONFIDENCE_CONFIG,
|
|
2
4
|
FileTracker,
|
|
3
5
|
IncrementalIndexer,
|
|
4
6
|
MetadataStore,
|
|
5
7
|
OllamaEmbeddingClient,
|
|
6
8
|
VectorStore,
|
|
9
|
+
calculateCombinedScore,
|
|
7
10
|
calculateDuplicateScore,
|
|
8
11
|
calculateGroupAverageSimilarity,
|
|
12
|
+
calculateNormalizedSimilarity,
|
|
13
|
+
calculateQuickScore,
|
|
9
14
|
calculateSizeRatio,
|
|
15
|
+
calculateSizeRatio2,
|
|
16
|
+
calculateStructuralSimilarity,
|
|
10
17
|
chunkFile,
|
|
11
18
|
clearIndexerCache,
|
|
19
|
+
compareConfidenceLevels,
|
|
12
20
|
createIndexer,
|
|
21
|
+
filterByConfidence,
|
|
13
22
|
findDuplicateGroups,
|
|
14
23
|
findDuplicates,
|
|
15
24
|
findSimilarAtLocation,
|
|
16
25
|
findSimilarToLocation,
|
|
17
26
|
findSimilarToQuery,
|
|
27
|
+
findStructurallySimilar,
|
|
28
|
+
formatCombinedScore,
|
|
29
|
+
formatConfidence,
|
|
30
|
+
formatConfidenceVerbose,
|
|
31
|
+
getConfidenceAnsiColor,
|
|
32
|
+
getConfidenceEmoji,
|
|
33
|
+
getConfidenceLevel,
|
|
34
|
+
getConfidenceResult,
|
|
18
35
|
getIndexStats,
|
|
19
36
|
getOllamaEmbeddingClient,
|
|
37
|
+
getRecommendedAction,
|
|
38
|
+
hasHighStructuralSimilarity,
|
|
20
39
|
hasIndex,
|
|
21
40
|
hashContent,
|
|
22
41
|
hashContentSync,
|
|
23
42
|
indexDirectory,
|
|
43
|
+
isNearIdentical,
|
|
44
|
+
isPotentialDuplicate,
|
|
45
|
+
jaccard,
|
|
46
|
+
levenshteinDistance,
|
|
47
|
+
meetsMinimumThreshold,
|
|
48
|
+
normalizeCode,
|
|
24
49
|
prepareEmbeddingInput,
|
|
50
|
+
prepareForEmbedding,
|
|
25
51
|
searchSimilar,
|
|
26
52
|
sortDuplicateGroups
|
|
27
|
-
} from "./chunk-
|
|
53
|
+
} from "./chunk-BAZQUJDS.js";
|
|
28
54
|
export {
|
|
55
|
+
DEFAULT_COMBINED_SCORER_OPTIONS,
|
|
56
|
+
DEFAULT_CONFIDENCE_CONFIG,
|
|
29
57
|
FileTracker,
|
|
30
58
|
IncrementalIndexer,
|
|
31
59
|
MetadataStore,
|
|
32
60
|
OllamaEmbeddingClient,
|
|
33
61
|
VectorStore,
|
|
62
|
+
calculateCombinedScore,
|
|
34
63
|
calculateDuplicateScore,
|
|
35
64
|
calculateGroupAverageSimilarity,
|
|
65
|
+
calculateNormalizedSimilarity,
|
|
66
|
+
calculateQuickScore,
|
|
36
67
|
calculateSizeRatio,
|
|
68
|
+
calculateStructuralSimilarity,
|
|
69
|
+
calculateSizeRatio2 as calculateStructuralSizeRatio,
|
|
37
70
|
chunkFile,
|
|
38
71
|
clearIndexerCache,
|
|
72
|
+
compareConfidenceLevels,
|
|
39
73
|
createIndexer,
|
|
74
|
+
filterByConfidence,
|
|
40
75
|
findDuplicateGroups,
|
|
41
76
|
findDuplicates,
|
|
42
77
|
findSimilarAtLocation,
|
|
43
78
|
findSimilarToLocation,
|
|
44
79
|
findSimilarToQuery,
|
|
80
|
+
findStructurallySimilar,
|
|
81
|
+
formatCombinedScore,
|
|
82
|
+
formatConfidence,
|
|
83
|
+
formatConfidenceVerbose,
|
|
84
|
+
getConfidenceAnsiColor,
|
|
85
|
+
getConfidenceEmoji,
|
|
86
|
+
getConfidenceLevel,
|
|
87
|
+
getConfidenceResult,
|
|
45
88
|
getIndexStats,
|
|
46
89
|
getOllamaEmbeddingClient,
|
|
90
|
+
getRecommendedAction,
|
|
91
|
+
hasHighStructuralSimilarity,
|
|
47
92
|
hasIndex,
|
|
48
93
|
hashContent,
|
|
49
94
|
hashContentSync,
|
|
50
95
|
indexDirectory,
|
|
96
|
+
isNearIdentical,
|
|
97
|
+
isPotentialDuplicate,
|
|
98
|
+
jaccard,
|
|
99
|
+
levenshteinDistance,
|
|
100
|
+
meetsMinimumThreshold,
|
|
101
|
+
normalizeCode,
|
|
51
102
|
prepareEmbeddingInput,
|
|
103
|
+
prepareForEmbedding,
|
|
52
104
|
searchSimilar,
|
|
53
105
|
sortDuplicateGroups
|
|
54
106
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "uilint-duplicates",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.124",
|
|
4
4
|
"description": "Semantic code duplicate detection for React/TypeScript codebases",
|
|
5
5
|
"author": "Peter Suggate",
|
|
6
6
|
"repository": {
|
|
@@ -55,7 +55,7 @@
|
|
|
55
55
|
"vitest": "^4.0.16"
|
|
56
56
|
},
|
|
57
57
|
"peerDependencies": {
|
|
58
|
-
"uilint-core": "0.2.
|
|
58
|
+
"uilint-core": "0.2.124"
|
|
59
59
|
},
|
|
60
60
|
"keywords": [
|
|
61
61
|
"duplicate-detection",
|