@mhalder/qdrant-mcp-server 3.3.1 → 3.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/claude-code-review.yml +10 -24
- package/.github/workflows/claude.yml +1 -1
- package/CHANGELOG.md +14 -0
- package/build/embeddings/sparse.d.ts +10 -2
- package/build/embeddings/sparse.d.ts.map +1 -1
- package/build/embeddings/sparse.js +34 -16
- package/build/embeddings/sparse.js.map +1 -1
- package/build/embeddings/sparse.test.js +52 -0
- package/build/embeddings/sparse.test.js.map +1 -1
- package/build/git/extractor.integration.test.js +8 -6
- package/build/git/extractor.integration.test.js.map +1 -1
- package/package.json +1 -1
- package/src/embeddings/sparse.test.ts +64 -0
- package/src/embeddings/sparse.ts +37 -17
- package/src/git/extractor.integration.test.ts +12 -6
|
@@ -2,7 +2,7 @@ name: Claude Code Review
|
|
|
2
2
|
|
|
3
3
|
on:
|
|
4
4
|
pull_request:
|
|
5
|
-
types: [opened, synchronize]
|
|
5
|
+
types: [opened, synchronize, ready_for_review, reopened]
|
|
6
6
|
# Optional: Only run on specific file changes
|
|
7
7
|
# paths:
|
|
8
8
|
# - "src/**/*.ts"
|
|
@@ -10,12 +10,6 @@ on:
|
|
|
10
10
|
# - "src/**/*.js"
|
|
11
11
|
# - "src/**/*.jsx"
|
|
12
12
|
|
|
13
|
-
permissions:
|
|
14
|
-
contents: read
|
|
15
|
-
pull-requests: write
|
|
16
|
-
issues: write
|
|
17
|
-
id-token: write
|
|
18
|
-
|
|
19
13
|
jobs:
|
|
20
14
|
claude-review:
|
|
21
15
|
# Optional: Filter by PR author
|
|
@@ -25,6 +19,11 @@ jobs:
|
|
|
25
19
|
# github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
|
|
26
20
|
|
|
27
21
|
runs-on: ubuntu-latest
|
|
22
|
+
permissions:
|
|
23
|
+
contents: read
|
|
24
|
+
pull-requests: read
|
|
25
|
+
issues: read
|
|
26
|
+
id-token: write
|
|
28
27
|
|
|
29
28
|
steps:
|
|
30
29
|
- name: Checkout repository
|
|
@@ -37,22 +36,9 @@ jobs:
|
|
|
37
36
|
uses: anthropics/claude-code-action@v1
|
|
38
37
|
with:
|
|
39
38
|
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
Please review this pull request and provide feedback on:
|
|
45
|
-
- Code quality and best practices
|
|
46
|
-
- Potential bugs or issues
|
|
47
|
-
- Performance considerations
|
|
48
|
-
- Security concerns
|
|
49
|
-
- Test coverage
|
|
50
|
-
|
|
51
|
-
Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback.
|
|
52
|
-
|
|
53
|
-
Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR.
|
|
54
|
-
|
|
39
|
+
plugin_marketplaces: 'https://github.com/anthropics/claude-code.git'
|
|
40
|
+
plugins: 'code-review@claude-code-plugins'
|
|
41
|
+
prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}'
|
|
55
42
|
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
|
56
|
-
# or https://
|
|
57
|
-
claude_args: '--allowed-tools "Bash(gh issue view:*),Bash(gh search:*),Bash(gh issue list:*),Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr list:*)"'
|
|
43
|
+
# or https://code.claude.com/docs/en/cli-reference for available options
|
|
58
44
|
|
|
@@ -45,6 +45,6 @@ jobs:
|
|
|
45
45
|
|
|
46
46
|
# Optional: Add claude_args to customize behavior and configuration
|
|
47
47
|
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
|
48
|
-
# or https://
|
|
48
|
+
# or https://code.claude.com/docs/en/cli-reference for available options
|
|
49
49
|
# claude_args: '--allowed-tools Bash(gh pr:*)'
|
|
50
50
|
|
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
## [3.3.3](https://github.com/mhalder/qdrant-mcp-server/compare/v3.3.2...v3.3.3) (2026-03-26)
|
|
2
|
+
|
|
3
|
+
### Bug Fixes
|
|
4
|
+
|
|
5
|
+
* **deps:** resolve npm audit vulnerabilities ([01d17e6](https://github.com/mhalder/qdrant-mcp-server/commit/01d17e6673476660f0afa75b99c9528aebcf3fec))
|
|
6
|
+
|
|
7
|
+
## [3.3.2](https://github.com/mhalder/qdrant-mcp-server/compare/v3.3.1...v3.3.2) (2026-03-25)
|
|
8
|
+
|
|
9
|
+
### Bug Fixes
|
|
10
|
+
|
|
11
|
+
* **sparse:** increase BM25 vocabulary size from 30k to 1M ([bc8fa2a](https://github.com/mhalder/qdrant-mcp-server/commit/bc8fa2a778461aad278339d069bad479aba46a6e))
|
|
12
|
+
* **sparse:** use deterministic hash-based vocabulary indices for BM25 ([cb6fa0d](https://github.com/mhalder/qdrant-mcp-server/commit/cb6fa0d256f0e0bfd628bdfbe4e7e7340f5b56eb))
|
|
13
|
+
* **test:** remove hardcoded commit counts in git extractor integration tests ([cae864f](https://github.com/mhalder/qdrant-mcp-server/commit/cae864f27ad642960966cb0f9ffbad0161ef25ae))
|
|
14
|
+
|
|
1
15
|
## [3.3.1](https://github.com/mhalder/qdrant-mcp-server/compare/v3.3.0...v3.3.1) (2026-02-09)
|
|
2
16
|
|
|
3
17
|
### Bug Fixes
|
|
@@ -2,17 +2,25 @@
|
|
|
2
2
|
* BM25 Sparse Vector Generator
|
|
3
3
|
*
|
|
4
4
|
* This module provides a simple BM25-like sparse vector generation for keyword search.
|
|
5
|
+
* Uses deterministic hash-based vocabulary indices so that the same token always maps
|
|
6
|
+
* to the same index, regardless of when or where the generator is instantiated.
|
|
7
|
+
*
|
|
5
8
|
* For production use, consider using a proper BM25 implementation or Qdrant's built-in
|
|
6
9
|
* sparse vector generation via FastEmbed.
|
|
7
10
|
*/
|
|
8
11
|
import type { SparseVector } from "../qdrant/client.js";
|
|
9
12
|
export declare class BM25SparseVectorGenerator {
|
|
10
|
-
private vocabulary;
|
|
11
13
|
private idfScores;
|
|
12
14
|
private documentCount;
|
|
13
15
|
private k1;
|
|
14
16
|
private b;
|
|
15
17
|
constructor(k1?: number, b?: number);
|
|
18
|
+
/**
|
|
19
|
+
* Deterministically hash a token to a fixed vocabulary index.
|
|
20
|
+
* The same token will always produce the same index, regardless of
|
|
21
|
+
* generator instance or document processing order.
|
|
22
|
+
*/
|
|
23
|
+
private hashToken;
|
|
16
24
|
/**
|
|
17
25
|
* Tokenize text into words (simple whitespace tokenization + lowercase)
|
|
18
26
|
*/
|
|
@@ -23,7 +31,7 @@ export declare class BM25SparseVectorGenerator {
|
|
|
23
31
|
private getTermFrequency;
|
|
24
32
|
/**
|
|
25
33
|
* Build vocabulary from training documents (optional pre-training step)
|
|
26
|
-
*
|
|
34
|
+
* Computes IDF scores for more accurate BM25 scoring.
|
|
27
35
|
*/
|
|
28
36
|
train(documents: string[]): void;
|
|
29
37
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sparse.d.ts","sourceRoot":"","sources":["../../src/embeddings/sparse.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"sparse.d.ts","sourceRoot":"","sources":["../../src/embeddings/sparse.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAcxD,qBAAa,yBAAyB;IACpC,OAAO,CAAC,SAAS,CAAsB;IACvC,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,EAAE,CAAS;IACnB,OAAO,CAAC,CAAC,CAAS;gBAEN,EAAE,GAAE,MAAY,EAAE,CAAC,GAAE,MAAa;IAO9C;;;;OAIG;IACH,OAAO,CAAC,SAAS;IAQjB;;OAEG;IACH,OAAO,CAAC,QAAQ;IAQhB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAQxB;;;OAGG;IACH,KAAK,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,IAAI;IAqBhC;;;OAGG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,YAAY,GAAE,MAAW,GAAG,YAAY;IAoC/D;;;OAGG;IACH,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY;CAIlD"}
|
|
@@ -2,22 +2,42 @@
|
|
|
2
2
|
* BM25 Sparse Vector Generator
|
|
3
3
|
*
|
|
4
4
|
* This module provides a simple BM25-like sparse vector generation for keyword search.
|
|
5
|
+
* Uses deterministic hash-based vocabulary indices so that the same token always maps
|
|
6
|
+
* to the same index, regardless of when or where the generator is instantiated.
|
|
7
|
+
*
|
|
5
8
|
* For production use, consider using a proper BM25 implementation or Qdrant's built-in
|
|
6
9
|
* sparse vector generation via FastEmbed.
|
|
7
10
|
*/
|
|
11
|
+
/**
|
|
12
|
+
* Size of the hash-based vocabulary space.
|
|
13
|
+
* Tokens are mapped to indices in [0, VOCAB_SIZE) via deterministic hashing.
|
|
14
|
+
* 1M provides virtually zero hash collisions while adding no overhead
|
|
15
|
+
* since sparse vectors only store non-zero (index, value) pairs.
|
|
16
|
+
*/
|
|
17
|
+
const VOCAB_SIZE = 1_000_000;
|
|
8
18
|
export class BM25SparseVectorGenerator {
|
|
9
|
-
vocabulary;
|
|
10
19
|
idfScores;
|
|
11
20
|
documentCount;
|
|
12
21
|
k1;
|
|
13
22
|
b;
|
|
14
23
|
constructor(k1 = 1.2, b = 0.75) {
|
|
15
|
-
this.vocabulary = new Map();
|
|
16
24
|
this.idfScores = new Map();
|
|
17
25
|
this.documentCount = 0;
|
|
18
26
|
this.k1 = k1;
|
|
19
27
|
this.b = b;
|
|
20
28
|
}
|
|
29
|
+
/**
|
|
30
|
+
* Deterministically hash a token to a fixed vocabulary index.
|
|
31
|
+
* The same token will always produce the same index, regardless of
|
|
32
|
+
* generator instance or document processing order.
|
|
33
|
+
*/
|
|
34
|
+
hashToken(token) {
|
|
35
|
+
let hash = 0;
|
|
36
|
+
for (let i = 0; i < token.length; i++) {
|
|
37
|
+
hash = ((hash << 5) - hash + token.charCodeAt(i)) | 0;
|
|
38
|
+
}
|
|
39
|
+
return Math.abs(hash) % VOCAB_SIZE;
|
|
40
|
+
}
|
|
21
41
|
/**
|
|
22
42
|
* Tokenize text into words (simple whitespace tokenization + lowercase)
|
|
23
43
|
*/
|
|
@@ -40,7 +60,7 @@ export class BM25SparseVectorGenerator {
|
|
|
40
60
|
}
|
|
41
61
|
/**
|
|
42
62
|
* Build vocabulary from training documents (optional pre-training step)
|
|
43
|
-
*
|
|
63
|
+
* Computes IDF scores for more accurate BM25 scoring.
|
|
44
64
|
*/
|
|
45
65
|
train(documents) {
|
|
46
66
|
this.documentCount = documents.length;
|
|
@@ -50,9 +70,6 @@ export class BM25SparseVectorGenerator {
|
|
|
50
70
|
const tokens = this.tokenize(doc);
|
|
51
71
|
const uniqueTokens = new Set(tokens);
|
|
52
72
|
for (const token of uniqueTokens) {
|
|
53
|
-
if (!this.vocabulary.has(token)) {
|
|
54
|
-
this.vocabulary.set(token, this.vocabulary.size);
|
|
55
|
-
}
|
|
56
73
|
documentFrequency.set(token, (documentFrequency.get(token) || 0) + 1);
|
|
57
74
|
}
|
|
58
75
|
}
|
|
@@ -70,16 +87,11 @@ export class BM25SparseVectorGenerator {
|
|
|
70
87
|
const tokens = this.tokenize(text);
|
|
71
88
|
const tf = this.getTermFrequency(tokens);
|
|
72
89
|
const docLength = tokens.length;
|
|
73
|
-
|
|
74
|
-
const
|
|
90
|
+
// Use a map to accumulate scores per index, handling potential hash collisions
|
|
91
|
+
const indexScores = new Map();
|
|
75
92
|
// Calculate BM25 score for each term
|
|
76
93
|
for (const [token, freq] of Object.entries(tf)) {
|
|
77
|
-
|
|
78
|
-
if (!this.vocabulary.has(token)) {
|
|
79
|
-
// For unseen tokens, add them to vocabulary dynamically
|
|
80
|
-
this.vocabulary.set(token, this.vocabulary.size);
|
|
81
|
-
}
|
|
82
|
-
const index = this.vocabulary.get(token);
|
|
94
|
+
const index = this.hashToken(token);
|
|
83
95
|
// Use a default IDF if not trained
|
|
84
96
|
const idf = this.idfScores.get(token) || 1.0;
|
|
85
97
|
// BM25 formula
|
|
@@ -87,10 +99,16 @@ export class BM25SparseVectorGenerator {
|
|
|
87
99
|
const denominator = freq + this.k1 * (1 - this.b + this.b * (docLength / avgDocLength));
|
|
88
100
|
const score = idf * (numerator / denominator);
|
|
89
101
|
if (score > 0) {
|
|
90
|
-
indices
|
|
91
|
-
|
|
102
|
+
// Sum scores for colliding hash indices
|
|
103
|
+
indexScores.set(index, (indexScores.get(index) || 0) + score);
|
|
92
104
|
}
|
|
93
105
|
}
|
|
106
|
+
const indices = [];
|
|
107
|
+
const values = [];
|
|
108
|
+
for (const [index, score] of indexScores.entries()) {
|
|
109
|
+
indices.push(index);
|
|
110
|
+
values.push(score);
|
|
111
|
+
}
|
|
94
112
|
return { indices, values };
|
|
95
113
|
}
|
|
96
114
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sparse.js","sourceRoot":"","sources":["../../src/embeddings/sparse.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"sparse.js","sourceRoot":"","sources":["../../src/embeddings/sparse.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAQH;;;;;GAKG;AACH,MAAM,UAAU,GAAG,SAAS,CAAC;AAE7B,MAAM,OAAO,yBAAyB;IAC5B,SAAS,CAAsB;IAC/B,aAAa,CAAS;IACtB,EAAE,CAAS;IACX,CAAC,CAAS;IAElB,YAAY,KAAa,GAAG,EAAE,IAAY,IAAI;QAC5C,IAAI,CAAC,SAAS,GAAG,IAAI,GAAG,EAAE,CAAC;QAC3B,IAAI,CAAC,aAAa,GAAG,CAAC,CAAC;QACvB,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACb,CAAC;IAED;;;;OAIG;IACK,SAAS,CAAC,KAAa;QAC7B,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC;IACrC,CAAC;IAED;;OAEG;IACK,QAAQ,CAAC,IAAY;QAC3B,OAAO,IAAI;aACR,WAAW,EAAE;aACb,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;aACxB,KAAK,CAAC,KAAK,CAAC;aACZ,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzC,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,MAAgB;QACvC,MAAM,EAAE,GAAmB,EAAE,CAAC;QAC9B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACnC,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,SAAmB;QACvB,IAAI,CAAC,aAAa,GAAG,SAAS,CAAC,MAAM,CAAC;QACtC,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAC;QAEpD,6CAA6C;QAC7C,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YAClC,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;YAErC,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;gBACjC,iBAAiB,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,iBAAiB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACxE,CAAC;QACH,CAAC;QAED,uBAAuB;QACvB,KAAK,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,iBAAiB,CAAC,OAAO,EAAE,EAAE,CAAC;YACtD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,aAAa,GAAG,EAAE,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC;YACzE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,QAAQ,CAAC,IAAY,EAAE,eAAuB,EAAE;QAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,EAAE,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC;QAEhC,+EAA+E;QAC/E,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;QAE9C,qCAAqC;QACrC,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC;YAC/C,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;YAEpC,mCAAmC;YACnC,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC;YAE7C,eAAe;YACf,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;YACvC,MAAM,WAAW,GAAG,IAAI,GAAG,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,SAAS,GAAG,YAAY,CAAC,CAAC,CAAC;YACxF,MAAM,KAAK,GAAG,GAAG,GAAG,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;YAE9C,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACd,wCAAwC;gBACxC,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC;YAChE,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;YACnD,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACpB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;QAED,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;IAC7B,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,cAAc,CAAC,IAAY;QAChC,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,OAAO,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC;CACF"}
|
|
@@ -65,5 +65,57 @@ describe("BM25SparseVectorGenerator", () => {
|
|
|
65
65
|
expect(value).toBeGreaterThan(0);
|
|
66
66
|
});
|
|
67
67
|
});
|
|
68
|
+
it("should produce deterministic indices across separate generator instances", () => {
|
|
69
|
+
// This is the core bug fix validation: two independent generators must
|
|
70
|
+
// assign the same index to the same token, so index-time and query-time
|
|
71
|
+
// sparse vectors are compatible.
|
|
72
|
+
const generator1 = new BM25SparseVectorGenerator();
|
|
73
|
+
const generator2 = new BM25SparseVectorGenerator();
|
|
74
|
+
const result1 = generator1.generate("hello world");
|
|
75
|
+
const result2 = generator2.generate("hello world");
|
|
76
|
+
expect(result1.indices).toEqual(result2.indices);
|
|
77
|
+
expect(result1.values).toEqual(result2.values);
|
|
78
|
+
});
|
|
79
|
+
it("should produce matching indices when query tokens are a subset of indexed tokens", () => {
|
|
80
|
+
// Simulates the real hybrid_search flow:
|
|
81
|
+
// 1. Index generator processes multiple documents (builds vocabulary)
|
|
82
|
+
// 2. Query generator is a fresh instance processing the query
|
|
83
|
+
// The query token indices must match the document token indices.
|
|
84
|
+
const indexGenerator = new BM25SparseVectorGenerator();
|
|
85
|
+
indexGenerator.generate("the quick brown fox jumps over the lazy dog");
|
|
86
|
+
indexGenerator.generate("machine learning is a subset of artificial intelligence");
|
|
87
|
+
indexGenerator.generate("sparse vectors enable keyword search in qdrant");
|
|
88
|
+
const queryGenerator = new BM25SparseVectorGenerator();
|
|
89
|
+
const queryResult = queryGenerator.generate("quick fox");
|
|
90
|
+
// Generate the same query with the index generator for comparison
|
|
91
|
+
const indexQueryResult = indexGenerator.generate("quick fox");
|
|
92
|
+
// Indices must be identical -- same tokens, same indices
|
|
93
|
+
const querySorted = [...queryResult.indices].sort((a, b) => a - b);
|
|
94
|
+
const indexSorted = [...indexQueryResult.indices].sort((a, b) => a - b);
|
|
95
|
+
expect(querySorted).toEqual(indexSorted);
|
|
96
|
+
});
|
|
97
|
+
it("should map different tokens to different indices (within hash collision tolerance)", () => {
|
|
98
|
+
const generator = new BM25SparseVectorGenerator();
|
|
99
|
+
// Use a set of clearly distinct tokens
|
|
100
|
+
const tokens = ["apple", "banana", "cherry", "dragon", "elephant",
|
|
101
|
+
"flamingo", "giraffe", "helicopter", "igloo", "jungle"];
|
|
102
|
+
const indices = new Set();
|
|
103
|
+
for (const token of tokens) {
|
|
104
|
+
const result = generator.generate(token);
|
|
105
|
+
// Each single-token text produces exactly one index
|
|
106
|
+
expect(result.indices).toHaveLength(1);
|
|
107
|
+
indices.add(result.indices[0]);
|
|
108
|
+
}
|
|
109
|
+
// With 10 tokens and a 1M-size vocabulary space, collisions should be essentially zero.
|
|
110
|
+
expect(indices.size).toBe(tokens.length);
|
|
111
|
+
});
|
|
112
|
+
it("should generate indices within valid vocabulary range", () => {
|
|
113
|
+
const generator = new BM25SparseVectorGenerator();
|
|
114
|
+
const result = generator.generate("testing various words for index range validation");
|
|
115
|
+
for (const index of result.indices) {
|
|
116
|
+
expect(index).toBeGreaterThanOrEqual(0);
|
|
117
|
+
expect(index).toBeLessThan(1_000_000);
|
|
118
|
+
}
|
|
119
|
+
});
|
|
68
120
|
});
|
|
69
121
|
//# sourceMappingURL=sparse.test.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sparse.test.js","sourceRoot":"","sources":["../../src/embeddings/sparse.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,yBAAyB,EAAE,MAAM,aAAa,CAAC;AAExD,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAEjD,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,GAAG,EAAE;QAC/D,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;QAEpD,+DAA+D;QAC/D,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC9D,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAElD,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QACjD,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QAEtC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;QAC1D,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,4BAA4B,CAAC,CAAC;QAEhE,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,CAAC,qBAAqB,EAAE,yBAAyB,EAAE,kBAAkB,CAAC,CAAC;QAEtF,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACxB,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;QAE/C,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,MAAM,GAAG,yBAAyB,CAAC,cAAc,CAAC,aAAa,CAAC,CAAC;QAEvE,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAElD,iDAAiD;QACjD,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAEjD,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;YAC9B,MAAM,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACnC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"file":"sparse.test.js","sourceRoot":"","sources":["../../src/embeddings/sparse.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,yBAAyB,EAAE,MAAM,aAAa,CAAC;AAExD,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAEjD,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,GAAG,EAAE;QAC/D,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;QAEpD,+DAA+D;QAC/D,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC9D,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAElD,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QACjD,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QAEtC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;QAC1D,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,4BAA4B,CAAC,CAAC;QAEhE,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,CAAC,qBAAqB,EAAE,yBAAyB,EAAE,kBAAkB,CAAC,CAAC;QAEtF,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACxB,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;QAE/C,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,MAAM,GAAG,yBAAyB,CAAC,cAAc,CAAC,aAAa,CAAC,CAAC;QAEvE,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAElD,iDAAiD;QACjD,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAEjD,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;YAC9B,MAAM,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACnC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0EAA0E,EAAE,GAAG,EAAE;QAClF,uEAAuE;QACvE,wEAAwE;QACxE,iCAAiC;QACjC,MAAM,UAAU,GAAG,IAAI,yBAAyB,EAAE,CAAC;QACnD,MAAM,UAAU,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAEnD,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QACnD,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAEnD,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QACjD,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kFAAkF,EAAE,GAAG,EAAE;QAC1F,yCAAyC;QACzC,sEAAsE;QACtE,8DAA8D;QAC9D,iEAAiE;QACjE,MAAM,cAAc,GAAG,IAAI,yBAAyB,EAAE,CAAC;QACvD,cAAc,CAAC,QAAQ,CAAC,6CAA6C,CAAC,CAAC;QACvE,cAAc,CAAC,QAAQ,CAAC,yDAAyD,CAAC,CAAC;QACnF,cAAc,CAAC,QAAQ,CAAC,gDAAgD,CAAC,CAAC;QAE1E,MAAM,cAAc,GAAG,IAAI,yBAAyB,EAAE,CAAC;QACvD,MAAM,WAAW,GAAG,cAAc,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;QAEzD,kEAAkE;QAClE,MAAM,gBAAgB,GAAG,cAAc,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;QAE9D,yDAAyD;QACzD,MAAM,WAAW,GAAG,CAAC,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACnE,MAAM,WAAW,GAAG,CAAC,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACxE,MAAM,CAAC,WAAW,CAAC,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oFAAoF,EAAE,GAAG,EAAE;QAC5F,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,uCAAuC;QACvC,MAAM,MAAM,GAAG,CAAC,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU;YAC/D,UAAU,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;QAE1D,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;QAClC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACzC,oDAAoD;YACpD,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACvC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QACjC,CAAC;QAED,wFAAwF;QACxF,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,GAAG,EAAE;QAC/D,MAAM,SAAS,GAAG,IAAI,yBAAyB,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,kDAAkD,CAAC,CAAC;QAEtF,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnC,MAAM,CAAC,KAAK,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;YACxC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QACxC,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -109,12 +109,12 @@ describe("GitExtractor Integration Tests", () => {
|
|
|
109
109
|
sinceCommit: sinceHash,
|
|
110
110
|
maxCommits: 20,
|
|
111
111
|
});
|
|
112
|
-
// Should have fewer commits
|
|
112
|
+
// Should have fewer commits than total
|
|
113
113
|
expect(recentCommits.length).toBeLessThan(allCommits.length);
|
|
114
|
-
expect(recentCommits.length).
|
|
115
|
-
// Verify
|
|
116
|
-
for (
|
|
117
|
-
expect(
|
|
114
|
+
expect(recentCommits.length).toBeGreaterThan(0);
|
|
115
|
+
// Verify all returned commits are newer than the sinceHash
|
|
116
|
+
for (const commit of recentCommits) {
|
|
117
|
+
expect(commit.hash).not.toBe(sinceHash);
|
|
118
118
|
}
|
|
119
119
|
}
|
|
120
120
|
});
|
|
@@ -158,7 +158,9 @@ describe("GitExtractor Integration Tests", () => {
|
|
|
158
158
|
if (commits.length >= 5) {
|
|
159
159
|
const sinceHash = commits[4].hash;
|
|
160
160
|
const count = await extractor.getCommitCount(sinceHash);
|
|
161
|
-
|
|
161
|
+
// Verify against git rev-list directly
|
|
162
|
+
const { stdout } = await execFileAsync("git", ["rev-list", "--count", `${sinceHash}..HEAD`], { cwd: repoPath });
|
|
163
|
+
expect(count).toBe(parseInt(stdout.trim(), 10));
|
|
162
164
|
}
|
|
163
165
|
});
|
|
164
166
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extractor.integration.test.js","sourceRoot":"","sources":["../../src/git/extractor.integration.test.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACzD,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEjD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAE1C,QAAQ,CAAC,gCAAgC,EAAE,GAAG,EAAE;IAC9C,IAAI,SAAuB,CAAC;IAC5B,MAAM,MAAM,GAAc,EAAE,GAAG,kBAAkB,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;IAErE,mDAAmD;IACnD,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAE/B,SAAS,CAAC,KAAK,IAAI,EAAE;QACnB,SAAS,GAAG,IAAI,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAE/C,mCAAmC;QACnC,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,kBAAkB,EAAE,CAAC;QACpD,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,qDAAqD,CAAC,CAAC;QACzE,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YAClD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,kBAAkB,EAAE,CAAC;YACpD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC,mBAAmB,EAAE,MAAM,CAAC,CAAC;YACnE,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,kBAAkB,EAAE,CAAC;YACvD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;QAC3C,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC9D,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;YAE/D,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;YAC1C,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC;YAE/C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,yCAAyC;gBACzC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;gBAE9C,+CAA+C;gBAC/C,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;gBAEnD,6BAA6B;gBAC7B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;gBAEhD,6BAA6B;gBAC7B,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;gBAE5C,uBAAuB;gBACvB,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;gBACzC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC;gBAE5C,8BAA8B;gBAC9B,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;gBAEjD,yDAAyD;gBACzD,uEAAuE;gBACvE,MAAM,cAAc,GAAG,kBAAkB,CAAC;gBAC1C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;gBAChD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;gBAClD,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;gBAEnD,wDAAwD;gBACxD,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;gBACpD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;gBACnD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACvD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAEtD,qBAAqB;gBACrB,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC/C,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBAChC,MAAM,CAAC,OAAO,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBACnC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;gBACzC,CAAC;YACH,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;YACxE,uCAAuC;YACvC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CACpC,KAAK,EACL,CAAC,UAAU,EAAE,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC,EAC3C,EAAE,GAAG,EAAE,QAAQ,EAAE,CAClB,CAAC;YACF,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YAEhE,4BAA4B;YAC5B,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;YAE/D,uBAAuB;YACvB,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,yCAAyC;YACzC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,aAAa,CAC/C,KAAK,EACL,CAAC,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC,EACvD,EAAE,GAAG,EAAE,QAAQ,EAAE,CAClB,CAAC;YAEF,0EAA0E;YAC1E,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC/B,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;gBAC/D,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEnE,0CAA0C;gBAC1C,MAAM,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;gBAEnD,wCAAwC;gBACxC,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;oBACtC,wEAAwE;oBACxE,iDAAiD;oBACjD,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;gBACjD,CAAC;YACH,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,8BAA8B,EAAE,GAAG,EAAE;QAC5C,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;YAC1D,wBAAwB;YACxB,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;YAElE,IAAI,UAAU,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC3B,0CAA0C;gBAC1C,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;gBAErC,+BAA+B;gBAC/B,MAAM,aAAa,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC;oBAC/C,WAAW,EAAE,SAAS;oBACtB,UAAU,EAAE,EAAE;iBACf,CAAC,CAAC;gBAEH,
|
|
1
|
+
{"version":3,"file":"extractor.integration.test.js","sourceRoot":"","sources":["../../src/git/extractor.integration.test.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACzD,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEjD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAE1C,QAAQ,CAAC,gCAAgC,EAAE,GAAG,EAAE;IAC9C,IAAI,SAAuB,CAAC;IAC5B,MAAM,MAAM,GAAc,EAAE,GAAG,kBAAkB,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;IAErE,mDAAmD;IACnD,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAE/B,SAAS,CAAC,KAAK,IAAI,EAAE;QACnB,SAAS,GAAG,IAAI,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAE/C,mCAAmC;QACnC,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,kBAAkB,EAAE,CAAC;QACpD,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,qDAAqD,CAAC,CAAC;QACzE,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YAClD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,kBAAkB,EAAE,CAAC;YACpD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC,mBAAmB,EAAE,MAAM,CAAC,CAAC;YACnE,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,kBAAkB,EAAE,CAAC;YACvD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;QAC3C,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC9D,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;YAE/D,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;YAC1C,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC;YAE/C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,yCAAyC;gBACzC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;gBAE9C,+CAA+C;gBAC/C,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;gBAEnD,6BAA6B;gBAC7B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;gBAEhD,6BAA6B;gBAC7B,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;gBAE5C,uBAAuB;gBACvB,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;gBACzC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC;gBAE5C,8BAA8B;gBAC9B,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;gBAEjD,yDAAyD;gBACzD,uEAAuE;gBACvE,MAAM,cAAc,GAAG,kBAAkB,CAAC;gBAC1C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;gBAChD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;gBAClD,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;gBAEnD,wDAAwD;gBACxD,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;gBACpD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;gBACnD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACvD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAEtD,qBAAqB;gBACrB,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC/C,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBAChC,MAAM,CAAC,OAAO,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBACnC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;gBACzC,CAAC;YACH,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;YACxE,uCAAuC;YACvC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CACpC,KAAK,EACL,CAAC,UAAU,EAAE,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC,EAC3C,EAAE,GAAG,EAAE,QAAQ,EAAE,CAClB,CAAC;YACF,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YAEhE,4BAA4B;YAC5B,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;YAE/D,uBAAuB;YACvB,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,yCAAyC;YACzC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,aAAa,CAC/C,KAAK,EACL,CAAC,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC,EACvD,EAAE,GAAG,EAAE,QAAQ,EAAE,CAClB,CAAC;YAEF,0EAA0E;YAC1E,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC/B,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;gBAC/D,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEnE,0CAA0C;gBAC1C,MAAM,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;gBAEnD,wCAAwC;gBACxC,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;oBACtC,wEAAwE;oBACxE,iDAAiD;oBACjD,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;gBACjD,CAAC;YACH,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,8BAA8B,EAAE,GAAG,EAAE;QAC5C,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;YAC1D,wBAAwB;YACxB,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;YAElE,IAAI,UAAU,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC3B,0CAA0C;gBAC1C,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;gBAErC,+BAA+B;gBAC/B,MAAM,aAAa,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC;oBAC/C,WAAW,EAAE,SAAS;oBACtB,UAAU,EAAE,EAAE;iBACf,CAAC,CAAC;gBAEH,uCAAuC;gBACvC,MAAM,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;gBAC7D,MAAM,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;gBAEhD,2DAA2D;gBAC3D,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;oBACnC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC1C,CAAC;YACH,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;QAC7B,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACrD,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC,CAAC;YAE9D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBAE5D,yCAAyC;gBACzC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;gBACjC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;YAC7D,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,0CAA0C,CAAC,CAAC;YACvF,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACxB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACnC,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YAClD,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,mBAAmB,EAAE,CAAC;YAEnD,gBAAgB;YAChB,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;YAEvC,uCAAuC;YACvC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CAAC,KAAK,EAAE,CAAC,WAAW,EAAE,MAAM,CAAC,EAAE;gBACnE,GAAG,EAAE,QAAQ;aACd,CAAC,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;QACnC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC9B,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;YAChD,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,cAAc,EAAE,CAAC;YAE/C,8BAA8B;YAC9B,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CACpC,KAAK,EACL,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,EAC/B,EAAE,GAAG,EAAE,QAAQ,EAAE,CAClB,CAAC;YACF,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;YAE/D,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBACxB,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;gBAClC,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC;gBAExD,uCAAuC;gBACvC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CACpC,KAAK,EACL,CAAC,UAAU,EAAE,SAAS,EAAE,GAAG,SAAS,QAAQ,CAAC,EAC7C,EAAE,GAAG,EAAE,QAAQ,EAAE,CAClB,CAAC;gBACF,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;YAClD,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -84,4 +84,68 @@ describe("BM25SparseVectorGenerator", () => {
|
|
|
84
84
|
expect(value).toBeGreaterThan(0);
|
|
85
85
|
});
|
|
86
86
|
});
|
|
87
|
+
|
|
88
|
+
it("should produce deterministic indices across separate generator instances", () => {
|
|
89
|
+
// This is the core bug fix validation: two independent generators must
|
|
90
|
+
// assign the same index to the same token, so index-time and query-time
|
|
91
|
+
// sparse vectors are compatible.
|
|
92
|
+
const generator1 = new BM25SparseVectorGenerator();
|
|
93
|
+
const generator2 = new BM25SparseVectorGenerator();
|
|
94
|
+
|
|
95
|
+
const result1 = generator1.generate("hello world");
|
|
96
|
+
const result2 = generator2.generate("hello world");
|
|
97
|
+
|
|
98
|
+
expect(result1.indices).toEqual(result2.indices);
|
|
99
|
+
expect(result1.values).toEqual(result2.values);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("should produce matching indices when query tokens are a subset of indexed tokens", () => {
|
|
103
|
+
// Simulates the real hybrid_search flow:
|
|
104
|
+
// 1. Index generator processes multiple documents (builds vocabulary)
|
|
105
|
+
// 2. Query generator is a fresh instance processing the query
|
|
106
|
+
// The query token indices must match the document token indices.
|
|
107
|
+
const indexGenerator = new BM25SparseVectorGenerator();
|
|
108
|
+
indexGenerator.generate("the quick brown fox jumps over the lazy dog");
|
|
109
|
+
indexGenerator.generate("machine learning is a subset of artificial intelligence");
|
|
110
|
+
indexGenerator.generate("sparse vectors enable keyword search in qdrant");
|
|
111
|
+
|
|
112
|
+
const queryGenerator = new BM25SparseVectorGenerator();
|
|
113
|
+
const queryResult = queryGenerator.generate("quick fox");
|
|
114
|
+
|
|
115
|
+
// Generate the same query with the index generator for comparison
|
|
116
|
+
const indexQueryResult = indexGenerator.generate("quick fox");
|
|
117
|
+
|
|
118
|
+
// Indices must be identical -- same tokens, same indices
|
|
119
|
+
const querySorted = [...queryResult.indices].sort((a, b) => a - b);
|
|
120
|
+
const indexSorted = [...indexQueryResult.indices].sort((a, b) => a - b);
|
|
121
|
+
expect(querySorted).toEqual(indexSorted);
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
it("should map different tokens to different indices (within hash collision tolerance)", () => {
|
|
125
|
+
const generator = new BM25SparseVectorGenerator();
|
|
126
|
+
// Use a set of clearly distinct tokens
|
|
127
|
+
const tokens = ["apple", "banana", "cherry", "dragon", "elephant",
|
|
128
|
+
"flamingo", "giraffe", "helicopter", "igloo", "jungle"];
|
|
129
|
+
|
|
130
|
+
const indices = new Set<number>();
|
|
131
|
+
for (const token of tokens) {
|
|
132
|
+
const result = generator.generate(token);
|
|
133
|
+
// Each single-token text produces exactly one index
|
|
134
|
+
expect(result.indices).toHaveLength(1);
|
|
135
|
+
indices.add(result.indices[0]);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// With 10 tokens and a 1M-size vocabulary space, collisions should be essentially zero.
|
|
139
|
+
expect(indices.size).toBe(tokens.length);
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it("should generate indices within valid vocabulary range", () => {
|
|
143
|
+
const generator = new BM25SparseVectorGenerator();
|
|
144
|
+
const result = generator.generate("testing various words for index range validation");
|
|
145
|
+
|
|
146
|
+
for (const index of result.indices) {
|
|
147
|
+
expect(index).toBeGreaterThanOrEqual(0);
|
|
148
|
+
expect(index).toBeLessThan(1_000_000);
|
|
149
|
+
}
|
|
150
|
+
});
|
|
87
151
|
});
|
package/src/embeddings/sparse.ts
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
* BM25 Sparse Vector Generator
|
|
3
3
|
*
|
|
4
4
|
* This module provides a simple BM25-like sparse vector generation for keyword search.
|
|
5
|
+
* Uses deterministic hash-based vocabulary indices so that the same token always maps
|
|
6
|
+
* to the same index, regardless of when or where the generator is instantiated.
|
|
7
|
+
*
|
|
5
8
|
* For production use, consider using a proper BM25 implementation or Qdrant's built-in
|
|
6
9
|
* sparse vector generation via FastEmbed.
|
|
7
10
|
*/
|
|
@@ -12,21 +15,40 @@ interface TokenFrequency {
|
|
|
12
15
|
[token: string]: number;
|
|
13
16
|
}
|
|
14
17
|
|
|
18
|
+
/**
|
|
19
|
+
* Size of the hash-based vocabulary space.
|
|
20
|
+
* Tokens are mapped to indices in [0, VOCAB_SIZE) via deterministic hashing.
|
|
21
|
+
* 1M provides virtually zero hash collisions while adding no overhead
|
|
22
|
+
* since sparse vectors only store non-zero (index, value) pairs.
|
|
23
|
+
*/
|
|
24
|
+
const VOCAB_SIZE = 1_000_000;
|
|
25
|
+
|
|
15
26
|
export class BM25SparseVectorGenerator {
|
|
16
|
-
private vocabulary: Map<string, number>;
|
|
17
27
|
private idfScores: Map<string, number>;
|
|
18
28
|
private documentCount: number;
|
|
19
29
|
private k1: number;
|
|
20
30
|
private b: number;
|
|
21
31
|
|
|
22
32
|
constructor(k1: number = 1.2, b: number = 0.75) {
|
|
23
|
-
this.vocabulary = new Map();
|
|
24
33
|
this.idfScores = new Map();
|
|
25
34
|
this.documentCount = 0;
|
|
26
35
|
this.k1 = k1;
|
|
27
36
|
this.b = b;
|
|
28
37
|
}
|
|
29
38
|
|
|
39
|
+
/**
|
|
40
|
+
* Deterministically hash a token to a fixed vocabulary index.
|
|
41
|
+
* The same token will always produce the same index, regardless of
|
|
42
|
+
* generator instance or document processing order.
|
|
43
|
+
*/
|
|
44
|
+
private hashToken(token: string): number {
|
|
45
|
+
let hash = 0;
|
|
46
|
+
for (let i = 0; i < token.length; i++) {
|
|
47
|
+
hash = ((hash << 5) - hash + token.charCodeAt(i)) | 0;
|
|
48
|
+
}
|
|
49
|
+
return Math.abs(hash) % VOCAB_SIZE;
|
|
50
|
+
}
|
|
51
|
+
|
|
30
52
|
/**
|
|
31
53
|
* Tokenize text into words (simple whitespace tokenization + lowercase)
|
|
32
54
|
*/
|
|
@@ -51,7 +73,7 @@ export class BM25SparseVectorGenerator {
|
|
|
51
73
|
|
|
52
74
|
/**
|
|
53
75
|
* Build vocabulary from training documents (optional pre-training step)
|
|
54
|
-
*
|
|
76
|
+
* Computes IDF scores for more accurate BM25 scoring.
|
|
55
77
|
*/
|
|
56
78
|
train(documents: string[]): void {
|
|
57
79
|
this.documentCount = documents.length;
|
|
@@ -63,9 +85,6 @@ export class BM25SparseVectorGenerator {
|
|
|
63
85
|
const uniqueTokens = new Set(tokens);
|
|
64
86
|
|
|
65
87
|
for (const token of uniqueTokens) {
|
|
66
|
-
if (!this.vocabulary.has(token)) {
|
|
67
|
-
this.vocabulary.set(token, this.vocabulary.size);
|
|
68
|
-
}
|
|
69
88
|
documentFrequency.set(token, (documentFrequency.get(token) || 0) + 1);
|
|
70
89
|
}
|
|
71
90
|
}
|
|
@@ -86,18 +105,12 @@ export class BM25SparseVectorGenerator {
|
|
|
86
105
|
const tf = this.getTermFrequency(tokens);
|
|
87
106
|
const docLength = tokens.length;
|
|
88
107
|
|
|
89
|
-
|
|
90
|
-
const
|
|
108
|
+
// Use a map to accumulate scores per index, handling potential hash collisions
|
|
109
|
+
const indexScores = new Map<number, number>();
|
|
91
110
|
|
|
92
111
|
// Calculate BM25 score for each term
|
|
93
112
|
for (const [token, freq] of Object.entries(tf)) {
|
|
94
|
-
|
|
95
|
-
if (!this.vocabulary.has(token)) {
|
|
96
|
-
// For unseen tokens, add them to vocabulary dynamically
|
|
97
|
-
this.vocabulary.set(token, this.vocabulary.size);
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
const index = this.vocabulary.get(token)!;
|
|
113
|
+
const index = this.hashToken(token);
|
|
101
114
|
|
|
102
115
|
// Use a default IDF if not trained
|
|
103
116
|
const idf = this.idfScores.get(token) || 1.0;
|
|
@@ -108,11 +121,18 @@ export class BM25SparseVectorGenerator {
|
|
|
108
121
|
const score = idf * (numerator / denominator);
|
|
109
122
|
|
|
110
123
|
if (score > 0) {
|
|
111
|
-
indices
|
|
112
|
-
|
|
124
|
+
// Sum scores for colliding hash indices
|
|
125
|
+
indexScores.set(index, (indexScores.get(index) || 0) + score);
|
|
113
126
|
}
|
|
114
127
|
}
|
|
115
128
|
|
|
129
|
+
const indices: number[] = [];
|
|
130
|
+
const values: number[] = [];
|
|
131
|
+
for (const [index, score] of indexScores.entries()) {
|
|
132
|
+
indices.push(index);
|
|
133
|
+
values.push(score);
|
|
134
|
+
}
|
|
135
|
+
|
|
116
136
|
return { indices, values };
|
|
117
137
|
}
|
|
118
138
|
|
|
@@ -148,13 +148,13 @@ describe("GitExtractor Integration Tests", () => {
|
|
|
148
148
|
maxCommits: 20,
|
|
149
149
|
});
|
|
150
150
|
|
|
151
|
-
// Should have fewer commits
|
|
151
|
+
// Should have fewer commits than total
|
|
152
152
|
expect(recentCommits.length).toBeLessThan(allCommits.length);
|
|
153
|
-
expect(recentCommits.length).
|
|
153
|
+
expect(recentCommits.length).toBeGreaterThan(0);
|
|
154
154
|
|
|
155
|
-
// Verify
|
|
156
|
-
for (
|
|
157
|
-
expect(
|
|
155
|
+
// Verify all returned commits are newer than the sinceHash
|
|
156
|
+
for (const commit of recentCommits) {
|
|
157
|
+
expect(commit.hash).not.toBe(sinceHash);
|
|
158
158
|
}
|
|
159
159
|
}
|
|
160
160
|
});
|
|
@@ -214,7 +214,13 @@ describe("GitExtractor Integration Tests", () => {
|
|
|
214
214
|
const sinceHash = commits[4].hash;
|
|
215
215
|
const count = await extractor.getCommitCount(sinceHash);
|
|
216
216
|
|
|
217
|
-
|
|
217
|
+
// Verify against git rev-list directly
|
|
218
|
+
const { stdout } = await execFileAsync(
|
|
219
|
+
"git",
|
|
220
|
+
["rev-list", "--count", `${sinceHash}..HEAD`],
|
|
221
|
+
{ cwd: repoPath },
|
|
222
|
+
);
|
|
223
|
+
expect(count).toBe(parseInt(stdout.trim(), 10));
|
|
218
224
|
}
|
|
219
225
|
});
|
|
220
226
|
});
|