@equinor/fusion-framework-cli-plugin-ai-index 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/LICENSE +21 -0
  3. package/README.md +114 -0
  4. package/dist/esm/bin/apply-metadata.js +63 -0
  5. package/dist/esm/bin/apply-metadata.js.map +1 -0
  6. package/dist/esm/bin/delete-removed-files.js +36 -0
  7. package/dist/esm/bin/delete-removed-files.js.map +1 -0
  8. package/dist/esm/bin/embed.js +196 -0
  9. package/dist/esm/bin/embed.js.map +1 -0
  10. package/dist/esm/bin/execute-pipeline.js +40 -0
  11. package/dist/esm/bin/execute-pipeline.js.map +1 -0
  12. package/dist/esm/bin/file-stream.js +22 -0
  13. package/dist/esm/bin/file-stream.js.map +1 -0
  14. package/dist/esm/bin/get-diff.js +29 -0
  15. package/dist/esm/bin/get-diff.js.map +1 -0
  16. package/dist/esm/bin/index.js +2 -0
  17. package/dist/esm/bin/index.js.map +1 -0
  18. package/dist/esm/bin/types.js +2 -0
  19. package/dist/esm/bin/types.js.map +1 -0
  20. package/dist/esm/command.js +82 -0
  21. package/dist/esm/command.js.map +1 -0
  22. package/dist/esm/command.options.js +48 -0
  23. package/dist/esm/command.options.js.map +1 -0
  24. package/dist/esm/config.js +2 -0
  25. package/dist/esm/config.js.map +1 -0
  26. package/dist/esm/index.js +13 -0
  27. package/dist/esm/index.js.map +1 -0
  28. package/dist/esm/utils/generate-chunk-id.js +18 -0
  29. package/dist/esm/utils/generate-chunk-id.js.map +1 -0
  30. package/dist/esm/utils/git/file-changes.js +196 -0
  31. package/dist/esm/utils/git/file-changes.js.map +1 -0
  32. package/dist/esm/utils/git/git-client.js +39 -0
  33. package/dist/esm/utils/git/git-client.js.map +1 -0
  34. package/dist/esm/utils/git/index.js +9 -0
  35. package/dist/esm/utils/git/index.js.map +1 -0
  36. package/dist/esm/utils/git/metadata.js +41 -0
  37. package/dist/esm/utils/git/metadata.js.map +1 -0
  38. package/dist/esm/utils/git/status.js +34 -0
  39. package/dist/esm/utils/git/status.js.map +1 -0
  40. package/dist/esm/utils/git/types.js +2 -0
  41. package/dist/esm/utils/git/types.js.map +1 -0
  42. package/dist/esm/utils/markdown/index.js +3 -0
  43. package/dist/esm/utils/markdown/index.js.map +1 -0
  44. package/dist/esm/utils/markdown/parser.js +72 -0
  45. package/dist/esm/utils/markdown/parser.js.map +1 -0
  46. package/dist/esm/utils/markdown/types.js +2 -0
  47. package/dist/esm/utils/markdown/types.js.map +1 -0
  48. package/dist/esm/utils/package-resolver.js +40 -0
  49. package/dist/esm/utils/package-resolver.js.map +1 -0
  50. package/dist/esm/utils/ts-doc/constants.js +13 -0
  51. package/dist/esm/utils/ts-doc/constants.js.map +1 -0
  52. package/dist/esm/utils/ts-doc/extractors.js +175 -0
  53. package/dist/esm/utils/ts-doc/extractors.js.map +1 -0
  54. package/dist/esm/utils/ts-doc/index.js +3 -0
  55. package/dist/esm/utils/ts-doc/index.js.map +1 -0
  56. package/dist/esm/utils/ts-doc/parser.js +37 -0
  57. package/dist/esm/utils/ts-doc/parser.js.map +1 -0
  58. package/dist/esm/utils/ts-doc/types.js +2 -0
  59. package/dist/esm/utils/ts-doc/types.js.map +1 -0
  60. package/dist/esm/utils/types.js +2 -0
  61. package/dist/esm/utils/types.js.map +1 -0
  62. package/dist/esm/version.js +3 -0
  63. package/dist/esm/version.js.map +1 -0
  64. package/dist/tsconfig.tsbuildinfo +1 -0
  65. package/dist/types/bin/apply-metadata.d.ts +1 -0
  66. package/dist/types/bin/delete-removed-files.d.ts +1 -0
  67. package/dist/types/bin/embed.d.ts +1 -0
  68. package/dist/types/bin/execute-pipeline.d.ts +1 -0
  69. package/dist/types/bin/file-stream.d.ts +1 -0
  70. package/dist/types/bin/get-diff.d.ts +1 -0
  71. package/dist/types/bin/index.d.ts +1 -0
  72. package/dist/types/bin/types.d.ts +1 -0
  73. package/dist/types/command.d.ts +2 -0
  74. package/dist/types/command.options.d.ts +62 -0
  75. package/dist/types/config.d.ts +33 -0
  76. package/dist/types/index.d.ts +8 -0
  77. package/dist/types/utils/generate-chunk-id.d.ts +8 -0
  78. package/dist/types/utils/git/file-changes.d.ts +21 -0
  79. package/dist/types/utils/git/git-client.d.ts +17 -0
  80. package/dist/types/utils/git/index.d.ts +5 -0
  81. package/dist/types/utils/git/metadata.d.ts +7 -0
  82. package/dist/types/utils/git/status.d.ts +12 -0
  83. package/dist/types/utils/git/types.d.ts +33 -0
  84. package/dist/types/utils/markdown/index.d.ts +2 -0
  85. package/dist/types/utils/markdown/parser.d.ts +21 -0
  86. package/dist/types/utils/markdown/types.d.ts +11 -0
  87. package/dist/types/utils/package-resolver.d.ts +14 -0
  88. package/dist/types/utils/ts-doc/constants.d.ts +5 -0
  89. package/dist/types/utils/ts-doc/extractors.d.ts +28 -0
  90. package/dist/types/utils/ts-doc/index.d.ts +2 -0
  91. package/dist/types/utils/ts-doc/parser.d.ts +23 -0
  92. package/dist/types/utils/ts-doc/types.d.ts +20 -0
  93. package/dist/types/utils/types.d.ts +17 -0
  94. package/dist/types/version.d.ts +1 -0
  95. package/package.json +72 -0
  96. package/src/bin/apply-metadata.ts +77 -0
  97. package/src/bin/delete-removed-files.ts +49 -0
  98. package/src/bin/embed.ts +262 -0
  99. package/src/bin/execute-pipeline.ts +48 -0
  100. package/src/bin/file-stream.ts +34 -0
  101. package/src/bin/get-diff.ts +33 -0
  102. package/src/bin/index.ts +1 -0
  103. package/src/bin/types.ts +48 -0
  104. package/src/command.options.ts +58 -0
  105. package/src/command.ts +100 -0
  106. package/src/config.ts +39 -0
  107. package/src/index.ts +19 -0
  108. package/src/utils/generate-chunk-id.ts +17 -0
  109. package/src/utils/git/file-changes.ts +213 -0
  110. package/src/utils/git/git-client.ts +43 -0
  111. package/src/utils/git/index.ts +19 -0
  112. package/src/utils/git/metadata.ts +47 -0
  113. package/src/utils/git/status.ts +48 -0
  114. package/src/utils/git/types.ts +36 -0
  115. package/src/utils/markdown/index.ts +5 -0
  116. package/src/utils/markdown/parser.ts +92 -0
  117. package/src/utils/markdown/types.ts +20 -0
  118. package/src/utils/package-resolver.ts +44 -0
  119. package/src/utils/ts-doc/constants.ts +13 -0
  120. package/src/utils/ts-doc/extractors.ts +246 -0
  121. package/src/utils/ts-doc/index.ts +5 -0
  122. package/src/utils/ts-doc/parser.ts +51 -0
  123. package/src/utils/ts-doc/types.ts +26 -0
  124. package/src/utils/types.ts +18 -0
  125. package/src/version.ts +2 -0
  126. package/tsconfig.json +27 -0
  127. package/vitest.config.ts +14 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,63 @@
1
+ # @equinor/fusion-framework-cli-plugin-ai-index
2
+
3
+ ## 1.0.0
4
+
5
+ ### Major Changes
6
+
7
+ - [`e2d2a76`](https://github.com/equinor/fusion-framework/commit/e2d2a76d08b86c3a9d8783fed1606551df9d5633) Thanks [@odinr](https://github.com/odinr)! - Add new AI indexing plugin package for document embedding and chunking utilities.
8
+
9
+ This plugin extends the Fusion Framework CLI with comprehensive document embedding and indexing capabilities for building searchable vector stores from code and documentation.
10
+
11
+ **Features:**
12
+
13
+ - Document chunking and embedding generation
14
+ - Git metadata extraction for context-aware embeddings
15
+ - Markdown/MDX and TypeScript/TSX documentation parsing
16
+ - File change tracking (new, modified, removed) via git diff
17
+ - Batch operations for efficient vector store updates
18
+ - Package metadata resolution
19
+ - TSDoc extraction utilities
20
+ - Dry-run mode for testing
21
+
22
+ **Quick Usage:**
23
+
24
+ 1. Install the plugin:
25
+
26
+ ```sh
27
+ pnpm add -D @equinor/fusion-framework-cli-plugin-ai-index
28
+ ```
29
+
30
+ 2. Configure in `fusion-cli.config.ts`:
31
+
32
+ ```typescript
33
+ import { defineFusionCli } from "@equinor/fusion-framework-cli";
34
+
35
+ export default defineFusionCli(() => ({
36
+ plugins: ["@equinor/fusion-framework-cli-plugin-ai-index"],
37
+ }));
38
+ ```
39
+
40
+ 3. Use the embeddings command:
41
+
42
+ ```sh
43
+ # Process all TypeScript and Markdown files
44
+ ffc ai embeddings "*.ts" "*.md" "*.mdx"
45
+
46
+ # Dry-run to preview what would be processed
47
+ ffc ai embeddings --dry-run ./src
48
+
49
+ # Process only changed files (git diff mode)
50
+ ffc ai embeddings --diff
51
+
52
+ # Clean and re-index all documents
53
+ ffc ai embeddings --clean "*.ts"
54
+ ```
55
+
56
+ The plugin supports Azure OpenAI and Azure Cognitive Search configuration via command-line options or environment variables. Configure file patterns and chunking options via `fusion-ai.config.ts`.
57
+
58
+ ### Patch Changes
59
+
60
+ - Updated dependencies [[`e2d2a76`](https://github.com/equinor/fusion-framework/commit/e2d2a76d08b86c3a9d8783fed1606551df9d5633), [`e2d2a76`](https://github.com/equinor/fusion-framework/commit/e2d2a76d08b86c3a9d8783fed1606551df9d5633), [`e2d2a76`](https://github.com/equinor/fusion-framework/commit/e2d2a76d08b86c3a9d8783fed1606551df9d5633)]:
61
+ - @equinor/fusion-framework-cli-plugin-ai-base@1.0.0
62
+ - @equinor/fusion-framework-cli@13.0.0
63
+ - @equinor/fusion-framework-module-ai@2.0.0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Equinor
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,114 @@
1
+ # @equinor/fusion-framework-cli-plugin-ai-index
2
+
3
+ AI indexing plugin for Fusion Framework CLI providing document embedding and chunking utilities.
4
+
5
+ ## Installation
6
+
7
+ ```sh
8
+ pnpm add -D @equinor/fusion-framework-cli-plugin-ai-index
9
+ ```
10
+
11
+ ## Configuration
12
+
13
+ After installing the plugin, create a `fusion-cli.config.ts` file in your project root:
14
+
15
+ ```typescript
16
+ import { defineFusionCli } from '@equinor/fusion-framework-cli';
17
+
18
+ export default defineFusionCli(() => ({
19
+ plugins: [
20
+ '@equinor/fusion-framework-cli-plugin-ai-index',
21
+ ],
22
+ }));
23
+ ```
24
+
25
+ The CLI will automatically discover and load plugins listed in this configuration file. The config file can be `.ts`, `.js`, or `.json`. The `defineFusionCli` helper provides type safety and IntelliSense support.
26
+
27
+ ## Features
28
+
29
+ This plugin extends the Fusion Framework CLI with AI indexing capabilities:
30
+
31
+ - **Document embedding** and chunking utilities
32
+ - Markdown/MDX document chunking with frontmatter extraction
33
+ - TypeScript/TSX TSDoc extraction and chunking
34
+ - Glob pattern support for file collection
35
+ - Git diff-based processing for workflow integration
36
+ - Dry-run mode for testing without actual processing
37
+
38
+ ## Usage
39
+
40
+ Once installed, the embeddings command is automatically available:
41
+
42
+ ```sh
43
+ # Generate embeddings from documents
44
+ ffc ai embeddings ./src
45
+ ```
46
+
47
+ ## Commands
48
+
49
+ ### `ai embeddings`
50
+
51
+ Document embedding utilities for Large Language Model processing.
52
+
53
+ **Features:**
54
+ - Markdown/MDX document chunking with frontmatter extraction
55
+ - TypeScript/TSX TSDoc extraction and chunking
56
+ - Glob pattern support for file collection
57
+ - Git diff-based processing for workflow integration
58
+ - Dry-run mode for testing without actual processing
59
+ - Configurable file patterns via fusion-ai.config.ts
60
+
61
+ **Options:**
62
+ - `--dry-run` - Show what would be processed without actually doing it
63
+ - `--config <config>` - Path to a config file (default: fusion-ai.config.ts)
64
+ - `--diff` - Process only changed files (workflow mode)
65
+ - `--base-ref <ref>` - Git reference to compare against (default: HEAD~1)
66
+ - `--clean` - Delete all existing documents from the vector store before processing
67
+ - `--openai-api-key <key>` - API key for Azure OpenAI
68
+ - `--openai-api-version <version>` - API version (default: 2024-02-15-preview)
69
+ - `--openai-instance <name>` - Azure OpenAI instance name
70
+ - `--openai-embedding-deployment <name>` - Azure OpenAI embedding deployment name
71
+ - `--azure-search-endpoint <url>` - Azure Search endpoint URL
72
+ - `--azure-search-api-key <key>` - Azure Search API key
73
+ - `--azure-search-index-name <name>` - Azure Search index name
74
+
75
+ **Examples:**
76
+ ```sh
77
+ $ ffc ai embeddings --dry-run ./src
78
+ $ ffc ai embeddings "*.ts" "*.md" "*.mdx"
79
+ $ ffc ai embeddings --diff
80
+ $ ffc ai embeddings --diff --base-ref origin/main
81
+ $ ffc ai embeddings --clean "*.ts"
82
+ ```
83
+
84
+ ## Configuration
85
+
86
+ The plugin requires Azure OpenAI and Azure Cognitive Search configuration. See the main CLI documentation for details on setting up API keys and endpoints.
87
+
88
+ You can also create a `fusion-ai.config.ts` file to configure file patterns and metadata processing:
89
+
90
+ ```typescript
91
+ import { configureFusionAI } from '@equinor/fusion-framework-cli-plugin-ai-index';
92
+ import type { FusionAIConfigWithIndex } from '@equinor/fusion-framework-cli-plugin-ai-index';
93
+
94
+ export default configureFusionAI((): FusionAIConfigWithIndex => ({
95
+ index: {
96
+ patterns: ['**/*.ts', '**/*.md', '**/*.mdx'],
97
+ metadata: {
98
+ attributeProcessor: (attributes, document) => {
99
+ // Custom metadata processing
100
+ return attributes;
101
+ },
102
+ },
103
+ embedding: {
104
+ chunkSize: 1000,
105
+ chunkOverlap: 200,
106
+ },
107
+ },
108
+ }));
109
+ ```
110
+
111
+ ## License
112
+
113
+ ISC
114
+
@@ -0,0 +1,63 @@
1
+ import path from 'node:path';
2
+ import { from, mergeMap, map, toArray } from 'rxjs';
3
+ import { extractGitMetadata } from '../utils/git/index.js';
4
+ import { resolvePackage } from '../utils/package-resolver.js';
5
+ /**
6
+ * Creates a stream that applies metadata to documents.
7
+ * @internal
8
+ */
9
+ export function applyMetadata(document$, indexConfig) {
10
+ // Resolve packages if enabled
11
+ const shouldResolvePackage = indexConfig?.metadata?.resolvePackage ?? false;
12
+ return document$.pipe(mergeMap((entry) => {
13
+ return from(entry.documents).pipe(
14
+ // Extract git metadata concurrently for all documents
15
+ mergeMap(async (document) => {
16
+ const rootPath = document.metadata.rootPath ?? process.cwd();
17
+ const sourcePath = path.join(rootPath, document.metadata.source);
18
+ const gitMetadata = document.metadata.source && indexConfig?.metadata?.resolveGit !== false
19
+ ? await extractGitMetadata(sourcePath)
20
+ : {};
21
+ // Resolve package information if enabled
22
+ let packageMetadata = {};
23
+ if (shouldResolvePackage && document.metadata.source) {
24
+ packageMetadata = await resolvePackage(sourcePath)
25
+ .then((pkg) => {
26
+ return {
27
+ pkg_name: pkg?.name,
28
+ pkg_version: pkg?.version,
29
+ pkg_keywords: pkg?.keywords,
30
+ };
31
+ })
32
+ .catch(() => ({}));
33
+ }
34
+ return {
35
+ ...document,
36
+ metadata: {
37
+ ...document.metadata,
38
+ attributes: {
39
+ ...document.metadata.attributes,
40
+ ...gitMetadata,
41
+ ...packageMetadata,
42
+ },
43
+ },
44
+ };
45
+ }),
46
+ // Apply custom attribute processor from config
47
+ map((document) => {
48
+ const attributeProcessor = indexConfig?.metadata?.attributeProcessor ||
49
+ ((attributes, _document) => attributes);
50
+ const attributes = attributeProcessor(document.metadata.attributes ?? {}, document);
51
+ return {
52
+ ...document,
53
+ metadata: {
54
+ ...document.metadata,
55
+ attributes,
56
+ },
57
+ };
58
+ }),
59
+ // Group back by file for batch deletion in next step
60
+ toArray());
61
+ }));
62
+ }
63
+ //# sourceMappingURL=apply-metadata.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"apply-metadata.js","sourceRoot":"","sources":["../../../src/bin/apply-metadata.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAGpD,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAC3D,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAI9D;;;GAGG;AACH,MAAM,UAAU,aAAa,CAC3B,SAAoC,EACpC,WAA6C;IAE7C,8BAA8B;IAC9B,MAAM,oBAAoB,GAAG,WAAW,EAAE,QAAQ,EAAE,cAAc,IAAI,KAAK,CAAC;IAE5E,OAAO,SAAS,CAAC,IAAI,CACnB,QAAQ,CAAC,CAAC,KAAK,EAAE,EAAE;QACjB,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,IAAI;QAC/B,sDAAsD;QACtD,QAAQ,CAAC,KAAK,EAAE,QAAQ,EAAgC,EAAE;YACxD,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,QAAQ,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;YAC7D,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACjE,MAAM,WAAW,GACf,QAAQ,CAAC,QAAQ,CAAC,MAAM,IAAI,WAAW,EAAE,QAAQ,EAAE,UAAU,KAAK,KAAK;gBACrE,CAAC,CAAC,MAAM,kBAAkB,CAAC,UAAU,CAAC;gBACtC,CAAC,CAAC,EAAE,CAAC;YAET,yCAAyC;YACzC,IAAI,eAAe,GAAG,EAAE,CAAC;YACzB,IAAI,oBAAoB,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;gBACrD,eAAe,GAAG,MAAM,cAAc,CAAC,UAAU,CAAC;qBAC/C,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;oBACZ,OAAO;wBACL,QAAQ,EAAE,GAAG,EAAE,IAAI;wBACnB,WAAW,EAAE,GAAG,EAAE,OAAO;wBACzB,YAAY,EAAE,GAAG,EAAE,QAAQ;qBAC5B,CAAC;gBACJ,CAAC,CAAC;qBACD,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACvB,CAAC;YACD,OAAO;gBACL,GAAG,QAAQ;gBACX,QAAQ,EAAE;oBACR,GAAG,QAAQ,CAAC,QAAQ;oBACpB,UAAU,EAAE;wBACV,GAAG,QAAQ,CAAC,QAAQ,CAAC,UAAU;wBAC/B,GAAG,WAAW;wBACd,GAAG,eAAe;qBACnB;iBACF;aACF,CAAC;QACJ,CAAC,CAAC;QACF,+CAA+C;QAC/C,GAAG,CAAC,CAAC,QAA6B,EAAE,EAAE;YACpC,MAAM,kBAAkB,GACtB,WAAW,EAAE,QAAQ,EAAE,kBAAkB;gBACzC,CAAC,CAAC,UAAmC,EAAE,SAA8B,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC;YACxF,MAAM,UAAU,GAAG,kBAAkB,CAAC,QAAQ,CAAC,QAAQ,CAAC,UAAU,IAAI,EAAE,EAAE,QAAQ,CAAC,CAAC;YACpF,OAAO;gBACL,GAAG,QAAQ;gBACX,QAAQ,EAAE;oBACR,GAAG,QAAQ,CAAC,QAAQ;oBACpB,UAAU;iBACX;aACF,CAAC;QACJ,CAAC,CAAC;QACF,qDAAqD;QACrD,OAAO,EAAE,CACV,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;AACJ,CAAC"}
@@ -0,0 +1,36 @@
1
+ import { map, mergeMap, toArray, filter } from 'rxjs';
2
+ /**
3
+ * Creates a stream that deletes removed files from the vector store.
4
+ * @internal
5
+ */
6
+ export function createDeleteRemovedFilesStream(removedFiles$, framework, options) {
7
+ return removedFiles$.pipe(toArray(), map((files) => {
8
+ if (files.length === 0) {
9
+ return { files: [], filterExpression: null };
10
+ }
11
+ // Build OData filter: "metadata/source eq 'path1' or metadata/source eq 'path2'"
12
+ const filterExpression = files
13
+ .map((file) => `metadata/source eq '${file.relativePath}'`)
14
+ .join(' or ');
15
+ return { files, filterExpression };
16
+ }), mergeMap(async ({ files, filterExpression }) => {
17
+ if (files.length === 0) {
18
+ return undefined;
19
+ }
20
+ for (const file of files) {
21
+ console.log('Removing entry from vector store', file.relativePath);
22
+ }
23
+ if (!options.dryRun) {
24
+ const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
25
+ // Single batch deletion - one file can produce multiple document chunks
26
+ await vectorStoreService.deleteDocuments({
27
+ filter: { filterExpression: filterExpression ?? undefined },
28
+ });
29
+ }
30
+ return {
31
+ status: 'deleted',
32
+ files: files,
33
+ };
34
+ }), filter((result) => Boolean(result)));
35
+ }
36
+ //# sourceMappingURL=delete-removed-files.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"delete-removed-files.js","sourceRoot":"","sources":["../../../src/bin/delete-removed-files.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,MAAM,CAAC;AAMtD;;;GAGG;AACH,MAAM,UAAU,8BAA8B,CAC5C,aAAwC,EACxC,SAA4B,EAC5B,OAAuB;IAEvB,OAAO,aAAa,CAAC,IAAI,CACvB,OAAO,EAAE,EACT,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;QACZ,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,gBAAgB,EAAE,IAAI,EAAE,CAAC;QAC/C,CAAC;QACD,iFAAiF;QACjF,MAAM,gBAAgB,GAAG,KAAK;aAC3B,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,uBAAuB,IAAI,CAAC,YAAY,GAAG,CAAC;aAC1D,IAAI,CAAC,MAAM,CAAC,CAAC;QAChB,OAAO,EAAE,KAAK,EAAE,gBAAgB,EAAE,CAAC;IACrC,CAAC,CAAC,EACF,QAAQ,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,gBAAgB,EAAE,EAAE,EAAE;QAC7C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,OAAO,CAAC,GAAG,CAAC,kCAAkC,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;QACrE,CAAC;QACD,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YACpB,MAAM,kBAAkB,GAAG,SAAS,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;YAC3F,wEAAwE;YACxE,MAAM,kBAAkB,CAAC,eAAe,CAAC;gBACvC,MAAM,EAAE,EAAE,gBAAgB,EAAE,gBAAgB,IAAI,SAAS,EAAE;aAC5D,CAAC,CAAC;QACL,CAAC;QACD,OAAO;YACL,MAAM,EAAE,SAAS;YACjB,KAAK,EAAE,KAAmC;SAC3C,CAAC;IACJ,CAAC,CAAC,EACF,MAAM,CAAC,CAAC,MAAM,EAAsC,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CACxE,CAAC;AACJ,CAAC"}
@@ -0,0 +1,196 @@
1
+ import { globbyStream } from 'globby';
2
+ import { relative } from 'node:path';
3
+ import multimatch from 'multimatch';
4
+ import { concat, from, merge } from 'rxjs';
5
+ import { concatMap, filter, map, mergeMap, shareReplay, toArray } from 'rxjs/operators';
6
+ import { isMarkdownFile, parseMarkdownFile } from '../utils/markdown/index.js';
7
+ import { getFileStatus, resolveProjectRoot } from '../utils/git/index.js';
8
+ import { isTypescriptFile, parseTsDocFromFileSync } from '../utils/ts-doc/index.js';
9
+ import { getDiff } from './get-diff.js';
10
+ import { createDeleteRemovedFilesStream } from './delete-removed-files.js';
11
+ import { applyMetadata } from './apply-metadata.js';
12
+ import { readFileSync } from 'node:fs';
13
+ import { generateChunkId } from '../utils/generate-chunk-id.js';
14
+ /**
15
+ * Default directories to skip before expensive git operations.
16
+ * These are common build artifacts and dependencies that should be ignored.
17
+ * @internal
18
+ */
19
+ const defaultIgnore = ['node_modules', '**/node_modules/**', 'dist', '**/dist/**', '.git'];
20
+ /**
21
+ * Main entry point for the embeddings bin.
22
+ * Orchestrates the entire embeddings generation pipeline.
23
+ * @internal
24
+ */
25
+ export async function embed(binOptions) {
26
+ const { framework, options, config, filePatterns } = binOptions;
27
+ // Handle clean operation (destructive - deletes all existing documents)
28
+ const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
29
+ if (options.clean && !options.dryRun) {
30
+ console.log('🧹 Cleaning vector store: deleting all existing documents...');
31
+ // OData filter: delete all documents with non-empty source (all indexed docs)
32
+ await vectorStoreService.deleteDocuments({
33
+ filter: { filterExpression: "metadata/source ne ''" },
34
+ });
35
+ console.log('✅ Vector store cleaned successfully');
36
+ }
37
+ // Handle diff-based processing (workflow mode)
38
+ const changedFiles = options.diff ? await getDiff(options) : [];
39
+ // Create file stream: diff mode uses git changes, normal mode uses globby
40
+ const files$ = (() => {
41
+ if (options.diff) {
42
+ return from(changedFiles);
43
+ }
44
+ // Directories to skip before expensive git operations.
45
+ // Note: Even with gitignore: true, globby still traverses ignored directories when .gitignore
46
+ // contains negation patterns (like !.yarn/releases), so we add explicit ignore patterns
47
+ // to prevent traversing these directories entirely.
48
+ const ignore = config.index?.ignore ?? defaultIgnore;
49
+ return from(globbyStream(filePatterns, {
50
+ ignore,
51
+ onlyFiles: true,
52
+ gitignore: true,
53
+ absolute: true,
54
+ })).pipe(
55
+ // Get git status concurrently, then flatten array results
56
+ mergeMap((path) => getFileStatus(path)), concatMap((files) => from(files)),
57
+ // Share stream for multiple subscribers (removedFiles$ and indexFiles$)
58
+ shareReplay({ refCount: true }));
59
+ })();
60
+ // Process files: enrich with metadata and filter by allowed patterns
61
+ const allowedFilePatterns = config.index?.patterns ?? [
62
+ '**/*.ts',
63
+ '**/*.tsx',
64
+ '**/*.md',
65
+ '**/*.mdx',
66
+ ];
67
+ // Process files: enrich with metadata and filter by allowed patterns
68
+ const processedFiles$ = files$.pipe(map((file) => {
69
+ const { filepath, status } = file;
70
+ const projectRoot = resolveProjectRoot(filepath);
71
+ const relativePath = projectRoot ? relative(projectRoot, filepath) : filepath;
72
+ return {
73
+ path: filepath,
74
+ status,
75
+ projectRoot,
76
+ relativePath,
77
+ };
78
+ }), filter((file) => {
79
+ const matches = multimatch(file.relativePath, allowedFilePatterns);
80
+ return matches.length > 0;
81
+ }),
82
+ // Share for multiple subscribers (removedFiles$, markdown$, typescript$)
83
+ shareReplay({ refCount: true }));
84
+ // Split stream: removed files for deletion, new/modified for indexing
85
+ const removedFiles$ = processedFiles$.pipe(filter((file) => file.status === 'removed'));
86
+ // Create processing streams
87
+ const delete$ = createDeleteRemovedFilesStream(removedFiles$, framework, options);
88
+ // New/modified files for indexing
89
+ const indexFiles$ = processedFiles$.pipe(filter((file) => file.status === 'new' || file.status === 'modified'),
90
+ // Share for markdown$ and typescript$ pipelines
91
+ shareReplay({ refCount: true }));
92
+ const isRawFile = (file) => {
93
+ const matches = multimatch(file.relativePath, config.index?.rawPatterns ?? []);
94
+ if (matches.length > 0) {
95
+ return true;
96
+ }
97
+ return false;
98
+ };
99
+ const rawFiles$ = indexFiles$.pipe(filter(isRawFile), map((file) => {
100
+ const document = {
101
+ id: generateChunkId(file.relativePath),
102
+ pageContent: readFileSync(file.path, 'utf8'),
103
+ metadata: {
104
+ source: file.relativePath,
105
+ type: 'raw',
106
+ },
107
+ };
108
+ return { status: file.status, documents: [document] };
109
+ }));
110
+ const markdown$ = indexFiles$.pipe(filter((x) => !isRawFile(x)), filter((file) => isMarkdownFile(file.path)), mergeMap(async (file) => {
111
+ const documents = await parseMarkdownFile(file);
112
+ return { status: file.status, documents };
113
+ }));
114
+ const typescript$ = indexFiles$.pipe(filter((x) => !isRawFile(x)), filter((file) => isTypescriptFile(file.path)), map((file) => {
115
+ const documents = parseTsDocFromFileSync(file);
116
+ return { status: file.status, documents };
117
+ }));
118
+ // Apply metadata to documents
119
+ const applyMetadata$ = applyMetadata(merge(rawFiles$, markdown$, typescript$), config.index);
120
+ // Generate embeddings
121
+ const embeddingService = framework.ai.getService('embeddings', options.openaiEmbeddingDeployment);
122
+ const applyEmbedding$ = applyMetadata$.pipe(mergeMap((documents) => from(documents).pipe(mergeMap(async (document) => {
123
+ console.log('embedding document', document.metadata.source);
124
+ const embeddings = await embeddingService
125
+ .embedQuery(document.pageContent)
126
+ .catch((error) => {
127
+ console.error(`❌ Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
128
+ console.error('document', document);
129
+ process.exit(1);
130
+ });
131
+ const metadata = { ...document.metadata, embedding: embeddings };
132
+ return { ...document, metadata };
133
+ }), toArray())));
134
+ // Update vector store
135
+ const upsert$ = applyEmbedding$.pipe(mergeMap(async (documents) => {
136
+ const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
137
+ if (documents.length === 0) {
138
+ return undefined;
139
+ }
140
+ for (const document of documents) {
141
+ console.log(`Adding entry [${document.id}] to vector store`, document.metadata.source);
142
+ }
143
+ if (!options.dryRun) {
144
+ // For multiple chunks from same file, delete existing chunks first
145
+ if (documents.length > 1) {
146
+ const sources = documents
147
+ .map((document) => document.metadata.source)
148
+ .reduce((acc, source) => acc.add(source), new Set());
149
+ const filterExpression = Array.from(sources)
150
+ .map((source) => `metadata/source eq '${source}'`)
151
+ .join(' or ');
152
+ // Fire-and-forget deletion (not awaited) - brief gap before new docs are indexed
153
+ vectorStoreService.deleteDocuments({ filter: { filterExpression } });
154
+ }
155
+ await vectorStoreService.addDocuments(documents);
156
+ }
157
+ return {
158
+ status: 'added',
159
+ documents,
160
+ };
161
+ }), filter((result) => Boolean(result)));
162
+ // Execute pipeline
163
+ // Track indexing results for reporting: deleted file paths and added document IDs
164
+ const indexingResults = {
165
+ deleted: [],
166
+ added: [],
167
+ };
168
+ // Execute pipeline: concat ensures deletions happen before additions
169
+ // This subscription triggers lazy RxJS execution and tracks all results
170
+ concat(delete$, upsert$).subscribe({
171
+ next: (result) => {
172
+ // Track deleted files by relative path
173
+ if (result.status === 'deleted') {
174
+ indexingResults.deleted.push(...result.files.map((file) => file.relativePath));
175
+ }
176
+ // Track added documents with source and ID (one file can produce multiple IDs)
177
+ else if (result.status === 'added') {
178
+ indexingResults.added.push(...result.documents.map((document) => ({
179
+ source: document.metadata.source,
180
+ id: document.id,
181
+ })));
182
+ }
183
+ },
184
+ error: (error) => {
185
+ console.error(`❌ Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
186
+ process.exit(1);
187
+ },
188
+ complete: () => {
189
+ // Pipeline completed - log results and exit
190
+ console.log('🗂️ Indexing results:', indexingResults);
191
+ console.log('✅ Embeddings generation completed!');
192
+ process.exit(0);
193
+ },
194
+ });
195
+ }
196
+ //# sourceMappingURL=embed.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embed.js","sourceRoot":"","sources":["../../../src/bin/embed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,UAAU,MAAM,YAAY,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,MAAM,CAAC;AAC3C,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,gBAAgB,CAAC;AAExF,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAC/E,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAC1E,OAAO,EAAE,gBAAgB,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAEpF,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EAAE,8BAA8B,EAAE,MAAM,2BAA2B,CAAC;AAC3E,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAQpD,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,eAAe,EAAE,MAAM,+BAA+B,CAAC;AAEhE;;;;GAIG;AACH,MAAM,aAAa,GAAG,CAAC,cAAc,EAAE,oBAAoB,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,CAAC,CAAC;AAE3F;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,UAAgC;IAC1D,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,GAAG,UAAU,CAAC;IAEhE,wEAAwE;IACxE,MAAM,kBAAkB,GAAG,SAAS,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IAC3F,IAAI,OAAO,CAAC,KAAK,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,8DAA8D,CAAC,CAAC;QAC5E,8EAA8E;QAC9E,MAAM,kBAAkB,CAAC,eAAe,CAAC;YACvC,MAAM,EAAE,EAAE,gBAAgB,EAAE,uBAAuB,EAAE;SACtD,CAAC,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;IACrD,CAAC;IAED,+CAA+C;IAC/C,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAEhE,0EAA0E;IAC1E,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE;QACnB,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,IAAI,CAAC,YAAY,CAAC,CAAC;QAC5B,CAAC;QAED,uDAAuD;QACvD,8FAA8F;QAC9F,wFAAwF;QACxF,oDAAoD;QACpD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,EAAE,MAAM,IAAI,aAAa,CAAC;QAErD,OAAO,IAAI,CACT,YAAY,CAAC,YAAY,EAAE;YACzB,MAAM;YACN,SAAS,EAAE,IAAI;YACf,SAAS,EAAE,IAAI;YACf,QAAQ,EAAE,IAAI;SACf,CAAC,CACH,CAAC,IAAI;QACJ,0DAA0D;QAC1D,QAAQ,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,EACvC,SAAS,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACjC,wEAAwE;QACxE,WAAW,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAChC,CAAC;IACJ,CAAC,CAAC,EAAE,CAAC;IAEL,qEAAqE;IACrE,MAAM,mBAAmB,GAAG,MAAM,CAAC,KAAK,EAAE,QAAQ,IAAI;QACpD,SAAS;QACT,UAAU;QACV,SAAS;QACT,UAAU;KACX,CAAC;IAEF,qEAAqE;IACrE,MAAM,eAAe,GAAG,MAAM,CAAC,IAAI,CACjC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACX,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;QAClC,MAAM,WAAW,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QAE9E,OAAO;YACL,IAAI,EAAE,QAAQ;YACd,MAAM;YACN,WAAW;YACX,YAAY;SACb,CAAC;IACJ,CAAC,CAAC,EACF,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACd,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,YAAY,EAAE,mBAAmB,CAAC,CAAC;QACnE,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;IAC5B,CAAC,CAAC;IACF,yEAAyE;IACzE,WAAW,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAChC,CAAC;IAEF,sEAAsE;IACtE,MAAM,aAAa,GAAG,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC;IAExF,4BAA4B;IAC5B,MAAM,OAAO,GAAG,8BAA8B,CAAC,aAAa,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IAElF,kCAAkC;IAClC,MAAM,WAAW,GAAG,eAAe,CAAC,IAAI,CACtC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,KAAK,KAAK,IAAI,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC;IACrE,gDAAgD;IAChD,WAAW,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAChC,CAAC;IAEF,MAAM,SAAS,GAAG,CAAC,IAAmB,EAAW,EAAE;QACjD,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,KAAK,EAAE,WAAW,IAAI,EAAE,CAAC,CAAC;QAC/E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC,CAAC;IAEF,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAChC,MAAM,CAAC,SAAS,CAAC,EACjB,GAAG,CAAC,CAAC,IAAI,EAAiB,EAAE;QAC1B,MAAM,QAAQ,GAAwB;YACpC,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,YAAY,CAAC;YACtC,WAAW,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC;YAC5C,QAAQ,EAAE;gBACR,MAAM,EAAE,IAAI,CAAC,YAAY;gBACzB,IAAI,EAAE,KAAK;aACZ;SACF,CAAC;QACF,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC;IACxD,CAAC,CAAC,CACH,CAAC;IAEF,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAChC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAC5B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAC3C,QAAQ,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;QACtB,MAAM,SAAS,GAAG,MAAM,iBAAiB,CAAC,IAAI,CAAC,CAAC;QAChD,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,CAAC;IAC5C,CAAC,CAAC,CACH,CAAC;IAEF,MAAM,WAAW,GAAG,WAAW,CAAC,IAAI,CAClC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAC5B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAC7C,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACX,MAAM,SAAS,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAC;QAC/C,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,CAAC;IAC5C,CAAC,CAAC,CACH,CAAC;IAEF,8BAA8B;IAC9B,MAAM,cAAc,GAAG,aAAa,CAAC,KAAK,CAAC,SAAS,EAAE,SAAS,EAAE,WAAW,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;IAE7F,sBAAsB;IACtB,MAAM,gBAAgB,GAAG,SAAS,CAAC,EAAE,CAAC,UAAU,CAAC,YAAY,EAAE,OAAO,CAAC,yBAAyB,CAAC,CAAC;IAClG,MAAM,eAAe,GAAG,cAAc,CAAC,IAAI,CACzC,QAAQ,CAAC,CAAC,SAAS,EAAE,EAAE,CACrB,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,CAClB,QAAQ,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;QAC1B,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC5D,MAAM,UAAU,GAAG,MAAM,gBAAgB;aACtC,UAAU,CAAC,QAAQ,CAAC,WAAW,CAAC;aAChC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;YACf,OAAO,CAAC,KAAK,CACX,YAAY,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CACvE,CAAC;YACF,OAAO,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;YACpC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;QACL,MAAM,QAAQ,GAAG,EAAE,GAAG,QAAQ,CAAC,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC;QACjE,OAAO,EAAE,GAAG,QAAQ,EAAE,QAAQ,EAAE,CAAC;IACnC,CAAC,CAAC,EACF,OAAO,EAAE,CACV,CACF,CACF,CAAC;IAEF,sBAAsB;IACtB,MAAM,OAAO,GAAG,eAAe,CAAC,IAAI,CAClC,QAAQ,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE;QAC3B,MAAM,kBAAkB,GAAG,SAAS,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;QAC3F,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,OAAO,CAAC,GAAG,CAAC,iBAAiB,QAAQ,CAAC,EAAE,mBAAmB,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACzF,CAAC;QACD,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YACpB,mEAAmE;YACnE,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,MAAM,OAAO,GAAG,SAAS;qBACtB,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC;qBAC3C,MAAM,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,IAAI,GAAG,EAAU,CAAC,CAAC;gBAE/D,MAAM,gBAAgB,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC;qBACzC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,uBAAuB,MAAM,GAAG,CAAC;qBACjD,IAAI,CAAC,MAAM,CAAC,CAAC;gBAEhB,iFAAiF;gBACjF,kBAAkB,CAAC,eAAe,CAAC,EAAE,MAAM,EAAE,EAAE,gBAAgB,EAAE,EAAE,CAAC,CAAC;YACvE,CAAC;YACD,MAAM,kBAAkB,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QACnD,CAAC;QACD,OAAO;YACL,MAAM,EAAE,OAAO;YACf,SAAS;SACiB,CAAC;IAC/B,CAAC,CAAC,EACF,MAAM,CAAC,CAAC,MAAM,EAAqC,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CACvE,CAAC;IAEF,mBAAmB;IACnB,kFAAkF;IAClF,MAAM,eAAe,GAAmE;QACtF,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,EAAE;KACV,CAAC;IAEF,qEAAqE;IACrE,wEAAwE;IACxE,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,SAAS,CAAC;QACjC,IAAI,EAAE,CAAC,MAAM,EAAE,EAAE;YACf,uCAAuC;YACvC,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAChC,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;YACjF,CAAC;YACD,+EAA+E;iBAC1E,IAAI,MAAM,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;gBACnC,eAAe,CAAC,KAAK,CAAC,IAAI,CACxB,GAAG,MAAM,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;oBACrC,MAAM,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM;oBAChC,EAAE,EAAE,QAAQ,CAAC,EAAE;iBAChB,CAAC,CAAC,CACJ,CAAC;YACJ,CAAC;QACH,CAAC;QACD,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE;YACf,OAAO,CAAC,KAAK,CAAC,YAAY,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;YACtF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QACD,QAAQ,EAAE,GAAG,EAAE;YACb,4CAA4C;YAC5C,OAAO,CAAC,GAAG,CAAC,uBAAuB,EAAE,eAAe,CAAC,CAAC;YACtD,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;KACF,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,40 @@
1
+ import { concat } from 'rxjs';
2
+ /**
3
+ * Executes the pipeline and tracks results.
4
+ * @internal
5
+ */
6
+ export function executePipeline(deleteRemovedFiles$, updateVectorStore$) {
7
+ // Track indexing results for reporting: deleted file paths and added document IDs
8
+ const indexingResults = {
9
+ deleted: [],
10
+ added: [],
11
+ };
12
+ // Execute pipeline: concat ensures deletions happen before additions
13
+ // This subscription triggers lazy RxJS execution and tracks all results
14
+ concat(deleteRemovedFiles$, updateVectorStore$).subscribe({
15
+ next: (result) => {
16
+ // Track deleted files by relative path
17
+ if (result.status === 'deleted') {
18
+ indexingResults.deleted.push(...result.files.map((file) => file.relativePath));
19
+ }
20
+ // Track added documents with source and ID (one file can produce multiple IDs)
21
+ else if (result.status === 'added') {
22
+ indexingResults.added.push(...result.documents.map((document) => ({
23
+ source: document.metadata.source,
24
+ id: document.id,
25
+ })));
26
+ }
27
+ },
28
+ error: (error) => {
29
+ console.error(`❌ Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
30
+ process.exit(1);
31
+ },
32
+ complete: () => {
33
+ // Pipeline completed - log results and exit
34
+ console.log('🗂️ Indexing results:', indexingResults);
35
+ console.log('✅ Embeddings generation completed!');
36
+ process.exit(0);
37
+ },
38
+ });
39
+ }
40
+ //# sourceMappingURL=execute-pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"execute-pipeline.js","sourceRoot":"","sources":["../../../src/bin/execute-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,MAAM,CAAC;AAI9B;;;GAGG;AACH,MAAM,UAAU,eAAe,CAC7B,mBAAyD,EACzD,kBAAuD;IAEvD,kFAAkF;IAClF,MAAM,eAAe,GAAmE;QACtF,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,EAAE;KACV,CAAC;IAEF,qEAAqE;IACrE,wEAAwE;IACxE,MAAM,CAAC,mBAAmB,EAAE,kBAAkB,CAAC,CAAC,SAAS,CAAC;QACxD,IAAI,EAAE,CAAC,MAAM,EAAE,EAAE;YACf,uCAAuC;YACvC,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAChC,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;YACjF,CAAC;YACD,+EAA+E;iBAC1E,IAAI,MAAM,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;gBACnC,eAAe,CAAC,KAAK,CAAC,IAAI,CACxB,GAAG,MAAM,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;oBACrC,MAAM,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM;oBAChC,EAAE,EAAE,QAAQ,CAAC,EAAE;iBAChB,CAAC,CAAC,CACJ,CAAC;YACJ,CAAC;QACH,CAAC;QACD,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE;YACf,OAAO,CAAC,KAAK,CAAC,YAAY,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;YACtF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QACD,QAAQ,EAAE,GAAG,EAAE;YACb,4CAA4C;YAC5C,OAAO,CAAC,GAAG,CAAC,uBAAuB,EAAE,eAAe,CAAC,CAAC;YACtD,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;KACF,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,22 @@
1
+ import { globbyStream } from 'globby';
2
+ import { from, mergeMap, concatMap, shareReplay } from 'rxjs';
3
+ import { getFileStatus } from '../utils/git/index.js';
4
+ /**
5
+ * Creates a file stream based on diff mode or glob patterns.
6
+ * @internal
7
+ */
8
+ export function createFileStream(options, changedFiles, filePatterns) {
9
+ if (options.diff) {
10
+ return from(changedFiles);
11
+ }
12
+ return from(globbyStream(filePatterns, {
13
+ onlyFiles: true,
14
+ gitignore: true,
15
+ absolute: true,
16
+ })).pipe(
17
+ // Get git status concurrently, then flatten array results
18
+ mergeMap((path) => getFileStatus(path)), concatMap((files) => from(files)),
19
+ // Share stream for multiple subscribers (removedFiles$ and indexFiles$)
20
+ shareReplay({ refCount: true }));
21
+ }
22
+ //# sourceMappingURL=file-stream.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"file-stream.js","sourceRoot":"","sources":["../../../src/bin/file-stream.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,MAAM,CAAC;AAE9D,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAItD;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAC9B,OAAuB,EACvB,YAA2B,EAC3B,YAAsB;IAEtB,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;QACjB,OAAO,IAAI,CAAC,YAAY,CAAC,CAAC;IAC5B,CAAC;IAED,OAAO,IAAI,CACT,YAAY,CAAC,YAAY,EAAE;QACzB,SAAS,EAAE,IAAI;QACf,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,IAAI;KACf,CAAC,CACH,CAAC,IAAI;IACJ,0DAA0D;IAC1D,QAAQ,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,EACvC,SAAS,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACjC,wEAAwE;IACxE,WAAW,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAChC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,29 @@
1
+ import { getChangedFiles, getGitStatus } from '../utils/git/index.js';
2
+ /**
3
+ * Handles diff-based processing to get changed files from git.
4
+ * @internal
5
+ */
6
+ export async function getDiff(options) {
7
+ try {
8
+ // Get current git status for informational output
9
+ const gitStatus = await getGitStatus();
10
+ console.log(`🔍 Git status: ${gitStatus.branch}@${gitStatus.commit}`);
11
+ console.log(`📊 Changes: ${gitStatus.stagedFiles} staged, ${gitStatus.unstagedFiles} unstaged`);
12
+ // Get changed files compared to base reference (default: HEAD~1)
13
+ const changedFiles = await getChangedFiles({
14
+ diff: options.diff,
15
+ baseRef: options.baseRef,
16
+ });
17
+ if (changedFiles.length === 0) {
18
+ console.log('✅ No changed files match the provided patterns. Nothing to process.');
19
+ process.exit(0);
20
+ }
21
+ console.log(`📝 Found ${changedFiles.length} changed files matching patterns`);
22
+ return changedFiles;
23
+ }
24
+ catch (error) {
25
+ console.error(`❌ Git diff error: ${error instanceof Error ? error.message : 'Unknown error'}`);
26
+ process.exit(1);
27
+ }
28
+ }
29
+ //# sourceMappingURL=get-diff.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"get-diff.js","sourceRoot":"","sources":["../../../src/bin/get-diff.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAGtE;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,OAAuB;IACnD,IAAI,CAAC;QACH,kDAAkD;QAClD,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,kBAAkB,SAAS,CAAC,MAAM,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;QACtE,OAAO,CAAC,GAAG,CAAC,eAAe,SAAS,CAAC,WAAW,YAAY,SAAS,CAAC,aAAa,WAAW,CAAC,CAAC;QAEhG,iEAAiE;QACjE,MAAM,YAAY,GAAG,MAAM,eAAe,CAAC;YACzC,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,OAAO,EAAE,OAAO,CAAC,OAAO;SACzB,CAAC,CAAC;QAEH,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC9B,OAAO,CAAC,GAAG,CAAC,qEAAqE,CAAC,CAAC;YACnF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,YAAY,YAAY,CAAC,MAAM,kCAAkC,CAAC,CAAC;QAC/E,OAAO,YAAY,CAAC;IACtB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,qBAAqB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QAC/F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC"}
@@ -0,0 +1,2 @@
1
+ export { embed } from './embed.js';
2
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/bin/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/bin/types.ts"],"names":[],"mappings":""}