@equinor/fusion-framework-cli-plugin-ai-index 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/LICENSE +21 -0
  3. package/README.md +114 -0
  4. package/dist/esm/bin/apply-metadata.js +63 -0
  5. package/dist/esm/bin/apply-metadata.js.map +1 -0
  6. package/dist/esm/bin/delete-removed-files.js +36 -0
  7. package/dist/esm/bin/delete-removed-files.js.map +1 -0
  8. package/dist/esm/bin/embed.js +196 -0
  9. package/dist/esm/bin/embed.js.map +1 -0
  10. package/dist/esm/bin/execute-pipeline.js +40 -0
  11. package/dist/esm/bin/execute-pipeline.js.map +1 -0
  12. package/dist/esm/bin/file-stream.js +22 -0
  13. package/dist/esm/bin/file-stream.js.map +1 -0
  14. package/dist/esm/bin/get-diff.js +29 -0
  15. package/dist/esm/bin/get-diff.js.map +1 -0
  16. package/dist/esm/bin/index.js +2 -0
  17. package/dist/esm/bin/index.js.map +1 -0
  18. package/dist/esm/bin/types.js +2 -0
  19. package/dist/esm/bin/types.js.map +1 -0
  20. package/dist/esm/command.js +82 -0
  21. package/dist/esm/command.js.map +1 -0
  22. package/dist/esm/command.options.js +48 -0
  23. package/dist/esm/command.options.js.map +1 -0
  24. package/dist/esm/config.js +2 -0
  25. package/dist/esm/config.js.map +1 -0
  26. package/dist/esm/index.js +13 -0
  27. package/dist/esm/index.js.map +1 -0
  28. package/dist/esm/utils/generate-chunk-id.js +18 -0
  29. package/dist/esm/utils/generate-chunk-id.js.map +1 -0
  30. package/dist/esm/utils/git/file-changes.js +196 -0
  31. package/dist/esm/utils/git/file-changes.js.map +1 -0
  32. package/dist/esm/utils/git/git-client.js +39 -0
  33. package/dist/esm/utils/git/git-client.js.map +1 -0
  34. package/dist/esm/utils/git/index.js +9 -0
  35. package/dist/esm/utils/git/index.js.map +1 -0
  36. package/dist/esm/utils/git/metadata.js +41 -0
  37. package/dist/esm/utils/git/metadata.js.map +1 -0
  38. package/dist/esm/utils/git/status.js +34 -0
  39. package/dist/esm/utils/git/status.js.map +1 -0
  40. package/dist/esm/utils/git/types.js +2 -0
  41. package/dist/esm/utils/git/types.js.map +1 -0
  42. package/dist/esm/utils/markdown/index.js +3 -0
  43. package/dist/esm/utils/markdown/index.js.map +1 -0
  44. package/dist/esm/utils/markdown/parser.js +72 -0
  45. package/dist/esm/utils/markdown/parser.js.map +1 -0
  46. package/dist/esm/utils/markdown/types.js +2 -0
  47. package/dist/esm/utils/markdown/types.js.map +1 -0
  48. package/dist/esm/utils/package-resolver.js +40 -0
  49. package/dist/esm/utils/package-resolver.js.map +1 -0
  50. package/dist/esm/utils/ts-doc/constants.js +13 -0
  51. package/dist/esm/utils/ts-doc/constants.js.map +1 -0
  52. package/dist/esm/utils/ts-doc/extractors.js +175 -0
  53. package/dist/esm/utils/ts-doc/extractors.js.map +1 -0
  54. package/dist/esm/utils/ts-doc/index.js +3 -0
  55. package/dist/esm/utils/ts-doc/index.js.map +1 -0
  56. package/dist/esm/utils/ts-doc/parser.js +37 -0
  57. package/dist/esm/utils/ts-doc/parser.js.map +1 -0
  58. package/dist/esm/utils/ts-doc/types.js +2 -0
  59. package/dist/esm/utils/ts-doc/types.js.map +1 -0
  60. package/dist/esm/utils/types.js +2 -0
  61. package/dist/esm/utils/types.js.map +1 -0
  62. package/dist/esm/version.js +3 -0
  63. package/dist/esm/version.js.map +1 -0
  64. package/dist/tsconfig.tsbuildinfo +1 -0
  65. package/dist/types/bin/apply-metadata.d.ts +1 -0
  66. package/dist/types/bin/delete-removed-files.d.ts +1 -0
  67. package/dist/types/bin/embed.d.ts +1 -0
  68. package/dist/types/bin/execute-pipeline.d.ts +1 -0
  69. package/dist/types/bin/file-stream.d.ts +1 -0
  70. package/dist/types/bin/get-diff.d.ts +1 -0
  71. package/dist/types/bin/index.d.ts +1 -0
  72. package/dist/types/bin/types.d.ts +1 -0
  73. package/dist/types/command.d.ts +2 -0
  74. package/dist/types/command.options.d.ts +62 -0
  75. package/dist/types/config.d.ts +33 -0
  76. package/dist/types/index.d.ts +8 -0
  77. package/dist/types/utils/generate-chunk-id.d.ts +8 -0
  78. package/dist/types/utils/git/file-changes.d.ts +21 -0
  79. package/dist/types/utils/git/git-client.d.ts +17 -0
  80. package/dist/types/utils/git/index.d.ts +5 -0
  81. package/dist/types/utils/git/metadata.d.ts +7 -0
  82. package/dist/types/utils/git/status.d.ts +12 -0
  83. package/dist/types/utils/git/types.d.ts +33 -0
  84. package/dist/types/utils/markdown/index.d.ts +2 -0
  85. package/dist/types/utils/markdown/parser.d.ts +21 -0
  86. package/dist/types/utils/markdown/types.d.ts +11 -0
  87. package/dist/types/utils/package-resolver.d.ts +14 -0
  88. package/dist/types/utils/ts-doc/constants.d.ts +5 -0
  89. package/dist/types/utils/ts-doc/extractors.d.ts +28 -0
  90. package/dist/types/utils/ts-doc/index.d.ts +2 -0
  91. package/dist/types/utils/ts-doc/parser.d.ts +23 -0
  92. package/dist/types/utils/ts-doc/types.d.ts +20 -0
  93. package/dist/types/utils/types.d.ts +17 -0
  94. package/dist/types/version.d.ts +1 -0
  95. package/package.json +72 -0
  96. package/src/bin/apply-metadata.ts +77 -0
  97. package/src/bin/delete-removed-files.ts +49 -0
  98. package/src/bin/embed.ts +262 -0
  99. package/src/bin/execute-pipeline.ts +48 -0
  100. package/src/bin/file-stream.ts +34 -0
  101. package/src/bin/get-diff.ts +33 -0
  102. package/src/bin/index.ts +1 -0
  103. package/src/bin/types.ts +48 -0
  104. package/src/command.options.ts +58 -0
  105. package/src/command.ts +100 -0
  106. package/src/config.ts +39 -0
  107. package/src/index.ts +19 -0
  108. package/src/utils/generate-chunk-id.ts +17 -0
  109. package/src/utils/git/file-changes.ts +213 -0
  110. package/src/utils/git/git-client.ts +43 -0
  111. package/src/utils/git/index.ts +19 -0
  112. package/src/utils/git/metadata.ts +47 -0
  113. package/src/utils/git/status.ts +48 -0
  114. package/src/utils/git/types.ts +36 -0
  115. package/src/utils/markdown/index.ts +5 -0
  116. package/src/utils/markdown/parser.ts +92 -0
  117. package/src/utils/markdown/types.ts +20 -0
  118. package/src/utils/package-resolver.ts +44 -0
  119. package/src/utils/ts-doc/constants.ts +13 -0
  120. package/src/utils/ts-doc/extractors.ts +246 -0
  121. package/src/utils/ts-doc/index.ts +5 -0
  122. package/src/utils/ts-doc/parser.ts +51 -0
  123. package/src/utils/ts-doc/types.ts +26 -0
  124. package/src/utils/types.ts +18 -0
  125. package/src/version.ts +2 -0
  126. package/tsconfig.json +27 -0
  127. package/vitest.config.ts +14 -0
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export { embed } from './embed.js';
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,2 @@
1
+ export declare const command: import("commander").Command;
2
+ export default command;
@@ -0,0 +1,62 @@
1
+ import { z } from 'zod';
2
+ /**
3
+ * Zod schema for validating command options for the embeddings command.
4
+ *
5
+ * This schema extends the base AI options schema with embeddings-specific options,
6
+ * ensuring type safety and runtime validation of command arguments.
7
+ *
8
+ * Note: Some optional AI options become required for the embeddings command
9
+ * (openaiEmbeddingDeployment, azureSearchEndpoint, azureSearchApiKey, azureSearchIndexName)
10
+ * because the command uses withAiOptions with includeEmbedding and includeSearch set to true.
11
+ */
12
+ export declare const CommandOptionsSchema: z.ZodObject<{
13
+ openaiApiKey: z.ZodString;
14
+ openaiApiVersion: z.ZodString;
15
+ openaiInstance: z.ZodString;
16
+ openaiChatDeployment: z.ZodOptional<z.ZodString>;
17
+ } & {
18
+ openaiEmbeddingDeployment: z.ZodString;
19
+ azureSearchEndpoint: z.ZodString;
20
+ azureSearchApiKey: z.ZodString;
21
+ azureSearchIndexName: z.ZodString;
22
+ dryRun: z.ZodBoolean;
23
+ config: z.ZodString;
24
+ diff: z.ZodBoolean;
25
+ baseRef: z.ZodOptional<z.ZodString>;
26
+ clean: z.ZodBoolean;
27
+ }, "strip", z.ZodTypeAny, {
28
+ openaiEmbeddingDeployment: string;
29
+ azureSearchEndpoint: string;
30
+ azureSearchApiKey: string;
31
+ azureSearchIndexName: string;
32
+ dryRun: boolean;
33
+ config: string;
34
+ diff: boolean;
35
+ clean: boolean;
36
+ openaiApiKey: string;
37
+ openaiApiVersion: string;
38
+ openaiInstance: string;
39
+ baseRef?: string | undefined;
40
+ openaiChatDeployment?: string | undefined;
41
+ }, {
42
+ openaiEmbeddingDeployment: string;
43
+ azureSearchEndpoint: string;
44
+ azureSearchApiKey: string;
45
+ azureSearchIndexName: string;
46
+ dryRun: boolean;
47
+ config: string;
48
+ diff: boolean;
49
+ clean: boolean;
50
+ openaiApiKey: string;
51
+ openaiApiVersion: string;
52
+ openaiInstance: string;
53
+ baseRef?: string | undefined;
54
+ openaiChatDeployment?: string | undefined;
55
+ }>;
56
+ /**
57
+ * Type representing the validated command options.
58
+ *
59
+ * This type is inferred from the Zod schema and should be used throughout the command
60
+ * to ensure type safety and consistency with the schema.
61
+ */
62
+ export type CommandOptions = z.infer<typeof CommandOptionsSchema>;
@@ -0,0 +1,33 @@
1
+ import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
2
+ import type { FusionAIConfig } from '@equinor/fusion-framework-cli-plugin-ai-base';
3
+ /**
4
+ * Index-specific configuration for Fusion AI operations
5
+ */
6
+ export interface IndexConfig {
7
+ patterns?: string[];
8
+ /** Files will be processed as is, without any chunking or transformation */
9
+ rawPatterns?: string[];
10
+ /** Globby patterns to ignored, only used when providing paths to the command */
11
+ ignore?: string[];
12
+ /** Metadata processing configuration */
13
+ metadata?: {
14
+ /** Automatically resolve package information from source file paths */
15
+ resolvePackage?: boolean;
16
+ resolveGit?: boolean;
17
+ /** Custom metadata processors to transform metadata before embedding */
18
+ attributeProcessor?: (metadata: Record<string, unknown>, document: VectorStoreDocument) => Record<string, unknown>;
19
+ };
20
+ /** Embedding generation configuration */
21
+ embedding?: {
22
+ /** Size of text chunks for embedding */
23
+ chunkSize?: number;
24
+ /** Overlap between chunks */
25
+ chunkOverlap?: number;
26
+ };
27
+ }
28
+ /**
29
+ * Extended Fusion AI configuration with index-specific settings
30
+ */
31
+ export interface FusionAIConfigWithIndex extends FusionAIConfig {
32
+ index?: IndexConfig;
33
+ }
@@ -0,0 +1,8 @@
1
+ import type { Command } from 'commander';
2
+ /**
3
+ * Registers the AI index plugin command with the CLI program
4
+ * @param program - The Commander program instance to register commands with
5
+ */
6
+ export declare function registerAiPlugin(program: Command): void;
7
+ export default registerAiPlugin;
8
+ export { configureFusionAI, type FusionAIConfig, } from '@equinor/fusion-framework-cli-plugin-ai-base';
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Generates a unique identifier for a document chunk based on file path
3
+ * Creates a deterministic, URL-safe hash from the file path for validation and checks
4
+ * @param filePath - The file path to generate an ID from
5
+ * @param chunkIndex - Optional chunk index to append for multi-chunk documents
6
+ * @returns A base64-encoded hash of the file path, optionally suffixed with chunk index
7
+ */
8
+ export declare const generateChunkId: (filePath: string, chunkIndex?: number) => string;
@@ -0,0 +1,21 @@
1
+ import type { ChangedFile, GitDiffOptions } from './types.js';
2
+ /**
3
+ * Get list of changed files using git diff with status
4
+ * @param options - Git diff configuration options
5
+ * @returns Array of changed files with their status
6
+ */
7
+ export declare const getChangedFiles: (options: GitDiffOptions) => Promise<ChangedFile[]>;
8
+ /**
9
+ * Determine the git status of a file, including handling renames
10
+ * Returns an array of ChangedFile objects - if the file was renamed, returns both old and new paths
11
+ * @param filePath - Absolute file path to check
12
+ * @returns Promise resolving to array of changed files (1 or 2 items if renamed)
13
+ */
14
+ export declare const getFileStatus: (filePath: string) => Promise<ChangedFile[]>;
15
+ /**
16
+ * Check if a file path matches any of the changed files
17
+ * @param filePath - File path to check
18
+ * @param changedFiles - Array of changed file objects
19
+ * @returns True if file has changed
20
+ */
21
+ export declare const isFileChanged: (filePath: string, changedFiles: ChangedFile[]) => boolean;
@@ -0,0 +1,17 @@
1
+ import { type SimpleGit } from 'simple-git';
2
+ /**
3
+ * Resolve the project root (git repository root) for a given file path
4
+ * @param filePath - File path to resolve from
5
+ * @returns Project root path or undefined if not in a git repository
6
+ */
7
+ export declare const resolveProjectRoot: (filePath: string) => string | undefined;
8
+ /**
9
+ * Get or create a SimpleGit instance for a given file path
10
+ * Uses caching to avoid creating multiple instances for the same repository
11
+ * @param filePath - File path to get git instance for
12
+ * @returns Git instance and repository path, or undefined if not in a git repository
13
+ */
14
+ export declare const getGit: (filePath: string) => {
15
+ git: SimpleGit | undefined;
16
+ gitRepoPath: string;
17
+ } | undefined;
@@ -0,0 +1,5 @@
1
+ export type { GitMetadata, GitDiffOptions, FileChangeStatus, ChangedFile, } from './types.js';
2
+ export { resolveProjectRoot, getGit } from './git-client.js';
3
+ export { extractGitMetadata } from './metadata.js';
4
+ export { getChangedFiles, getFileStatus, isFileChanged } from './file-changes.js';
5
+ export { getGitStatus } from './status.js';
@@ -0,0 +1,7 @@
1
+ import type { GitMetadata } from './types.js';
2
+ /**
3
+ * Extract git metadata for a file
4
+ * @param filePath - Absolute file path
5
+ * @returns Git metadata or undefined if not in a git repository
6
+ */
7
+ export declare const extractGitMetadata: (filePath: string) => Promise<GitMetadata | undefined>;
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Get git status information for debugging
3
+ * @param cwd - Working directory
4
+ * @returns Git status information
5
+ */
6
+ export declare const getGitStatus: (cwd?: string) => Promise<{
7
+ branch: string;
8
+ commit: string;
9
+ hasChanges: boolean;
10
+ stagedFiles: number;
11
+ unstagedFiles: number;
12
+ }>;
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Git metadata extracted from repository
3
+ */
4
+ export type GitMetadata = Partial<{
5
+ git_remote_url: string;
6
+ git_commit_hash: string;
7
+ git_commit_date: string;
8
+ git_link: string;
9
+ }>;
10
+ /**
11
+ * Git diff options for filtering changed files
12
+ */
13
+ export interface GitDiffOptions {
14
+ /** Enable diff-based file filtering */
15
+ diff: boolean;
16
+ /** Git reference to compare against (default: HEAD~1) */
17
+ baseRef?: string;
18
+ /** Working directory for git operations */
19
+ cwd?: string;
20
+ }
21
+ /**
22
+ * File change status
23
+ */
24
+ export type FileChangeStatus = 'new' | 'modified' | 'removed';
25
+ /**
26
+ * Changed file information
27
+ */
28
+ export interface ChangedFile {
29
+ /** Absolute file path */
30
+ filepath: string;
31
+ /** Change status: new, modified, or removed */
32
+ status: FileChangeStatus;
33
+ }
@@ -0,0 +1,2 @@
1
+ export type { MarkdownMetadata, MarkdownDocument } from './types.js';
2
+ export { isMarkdownFile, parseMarkdown, parseMarkdownFile } from './parser.js';
@@ -0,0 +1,21 @@
1
+ import type { SourceFile } from '../types.js';
2
+ import type { MarkdownDocument } from './types.js';
3
+ /**
4
+ * Check if a file is a markdown or MDX file
5
+ * @param filePath - File path to check
6
+ * @returns True if file has .md or .mdx extension
7
+ */
8
+ export declare const isMarkdownFile: (filePath: string) => boolean;
9
+ /**
10
+ * Parse markdown or MDX content into document chunks
11
+ * @param content - Markdown or MDX content string
12
+ * @param source - Source file path
13
+ * @returns Array of markdown documents
14
+ */
15
+ export declare const parseMarkdown: <T extends Record<string, unknown> = Record<string, unknown>>(content: string, source: string) => Promise<MarkdownDocument<T>[]>;
16
+ /**
17
+ * Parse a markdown or MDX file into document chunks
18
+ * @param file - Source file object
19
+ * @returns Array of markdown documents with root path metadata
20
+ */
21
+ export declare const parseMarkdownFile: <T extends Record<string, unknown> = Record<string, unknown>>(file: SourceFile) => Promise<MarkdownDocument<T>[]>;
@@ -0,0 +1,11 @@
1
+ import type { VectorStoreDocument, VectorStoreDocumentMetadata } from '@equinor/fusion-framework-module-ai/lib';
2
+ /**
3
+ * Markdown document metadata
4
+ */
5
+ export type MarkdownMetadata<T extends Record<string, unknown> = Record<string, unknown>> = VectorStoreDocumentMetadata<T & {
6
+ type: 'markdown';
7
+ }>;
8
+ /**
9
+ * Markdown document
10
+ */
11
+ export type MarkdownDocument<T extends Record<string, unknown> = Record<string, unknown>> = VectorStoreDocument<MarkdownMetadata<T>>;
@@ -0,0 +1,14 @@
1
+ import { type PackageJson } from 'read-package-up';
2
+ /**
3
+ * Resolves which package a file path belongs to.
4
+ * First checks the cache map, then uses read-package-up if no match found.
5
+ *
6
+ * @param filePath - Absolute or relative file path (e.g., '/path/to/packages/cli/src/index.ts')
7
+ * @returns Package.json if found, undefined otherwise
8
+ *
9
+ * @example
10
+ * ```ts
11
+ * const packageJson = await resolvePackage('/path/to/packages/cli/src/index.ts');
12
+ * ```
13
+ */
14
+ export declare function resolvePackage(filePath: string): Promise<PackageJson | undefined>;
@@ -0,0 +1,5 @@
1
+ import { SyntaxKind } from 'ts-morph';
2
+ /**
3
+ * Supported TSDoc node kinds for top-level processing
4
+ */
5
+ export declare const nodeKinds: SyntaxKind[];
@@ -0,0 +1,28 @@
1
+ import { type SourceFile as ProjectSourceFile, Node, type ClassDeclaration } from 'ts-morph';
2
+ import type { TypescriptDocument, ParseTsDocOptions } from './types.js';
3
+ /**
4
+ * Extracts a TSDoc document from a class node, including TSDoc, constructor, and public member signatures.
5
+ * @param classNode - The ClassDeclaration node to process.
6
+ * @param sourceFile - The source file containing the node.
7
+ * @param options - Optional parsing configuration.
8
+ * @returns A TypeScript document with TSDoc and class interface, or null if no TSDoc is found.
9
+ */
10
+ export declare const extractDocumentFromClassNode: (classNode: ClassDeclaration, sourceFile: ProjectSourceFile, _options?: ParseTsDocOptions) => TypescriptDocument | null;
11
+ /**
12
+ * Extracts a TSDoc document from a single node.
13
+ * @param node - The TypeScript node to process.
14
+ * @param sourceFile - The source file containing the node.
15
+ * @param options - Optional parsing configuration.
16
+ * @param nodeOptions - Optional node-specific configuration (e.g., skipKindCheck for VariableStatement).
17
+ * @returns A TypeScript document with TSDoc metadata, or null if no TSDoc is found.
18
+ */
19
+ export declare const extractDocumentFromNode: (node: Node, sourceFile: ProjectSourceFile, options?: ParseTsDocOptions, nodeOptions?: {
20
+ skipKindCheck?: boolean;
21
+ }) => TypescriptDocument | null;
22
+ /**
23
+ * Processes a TypeScript source file to extract TSDoc documents.
24
+ * @param sourceFile - The source file to process.
25
+ * @param options - Optional parsing configuration.
26
+ * @returns An array of TypeScript documents with TSDoc metadata.
27
+ */
28
+ export declare const processSourceFile: (sourceFile: ProjectSourceFile, options?: ParseTsDocOptions) => TypescriptDocument[];
@@ -0,0 +1,2 @@
1
+ export type { TypescriptMetadata, TypescriptDocument, ParseTsDocOptions } from './types.js';
2
+ export { isTypescriptFile, parseTsDocSync, parseTsDocFromFileSync } from './parser.js';
@@ -0,0 +1,23 @@
1
+ import type { SourceFile } from '../types.js';
2
+ import type { TypescriptDocument, ParseTsDocOptions } from './types.js';
3
+ /**
4
+ * Checks if a file is a TypeScript or TSX file based on its extension.
5
+ * @param filePath - The path to the file.
6
+ * @returns True if the file ends with .ts or .tsx, false otherwise.
7
+ */
8
+ export declare const isTypescriptFile: (filePath: string) => boolean;
9
+ /**
10
+ * Parses TSDoc from a string of TypeScript code.
11
+ * @param content - The TypeScript code content.
12
+ * @param options - Optional parsing configuration.
13
+ * @returns An array of TypeScript documents with TSDoc metadata.
14
+ */
15
+ export declare const parseTsDocSync: (content: string, options?: ParseTsDocOptions) => TypescriptDocument[];
16
+ /**
17
+ * Parses TSDoc from a TypeScript file by path.
18
+ * @param file - The source file object.
19
+ * @param options - Optional parsing configuration.
20
+ * @returns An array of TypeScript documents with TSDoc metadata.
21
+ * @throws If the file is not a TypeScript file.
22
+ */
23
+ export declare const parseTsDocFromFileSync: (file: SourceFile, options?: ParseTsDocOptions) => TypescriptDocument[];
@@ -0,0 +1,20 @@
1
+ import type { VectorStoreDocument, VectorStoreDocumentMetadata } from '@equinor/fusion-framework-module-ai/lib';
2
+ /**
3
+ * TypeScript document metadata
4
+ */
5
+ export type TypescriptMetadata = VectorStoreDocumentMetadata<{
6
+ type: 'tsdoc';
7
+ ts_kind: string;
8
+ ts_name: string;
9
+ }>;
10
+ /**
11
+ * TypeScript document with TSDoc metadata
12
+ */
13
+ export type TypescriptDocument = VectorStoreDocument<TypescriptMetadata>;
14
+ /**
15
+ * Options for parsing TypeScript documents
16
+ */
17
+ export interface ParseTsDocOptions {
18
+ /** The project root path for generating relative paths */
19
+ projectRoot?: string;
20
+ }
@@ -0,0 +1,17 @@
1
+ /**
2
+ * File change status in git
3
+ */
4
+ export type FileStatus = 'new' | 'modified' | 'removed';
5
+ /**
6
+ * Source file information for processing
7
+ */
8
+ export type SourceFile = {
9
+ /** Absolute file path */
10
+ path: string;
11
+ /** Project root directory (git repository root) */
12
+ projectRoot?: string;
13
+ /** Relative path from project root */
14
+ relativePath?: string;
15
+ /** Git change status */
16
+ status: FileStatus;
17
+ };
@@ -0,0 +1 @@
1
+ export declare const version = "1.0.0";
package/package.json ADDED
@@ -0,0 +1,72 @@
1
+ {
2
+ "name": "@equinor/fusion-framework-cli-plugin-ai-index",
3
+ "version": "1.0.0",
4
+ "description": "AI indexing plugin for Fusion Framework CLI providing document embedding and chunking utilities",
5
+ "main": "dist/esm/index.js",
6
+ "type": "module",
7
+ "types": "dist/types/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "import": "./dist/esm/index.js",
11
+ "types": "./dist/types/index.d.ts"
12
+ }
13
+ },
14
+ "typesVersions": {
15
+ "*": {
16
+ ".": [
17
+ "dist/types/index.d.ts"
18
+ ]
19
+ }
20
+ },
21
+ "keywords": [
22
+ "fusion-framework",
23
+ "cli",
24
+ "plugin",
25
+ "llm",
26
+ "ai",
27
+ "index",
28
+ "embeddings"
29
+ ],
30
+ "author": "",
31
+ "license": "ISC",
32
+ "publishConfig": {
33
+ "access": "public"
34
+ },
35
+ "repository": {
36
+ "type": "git",
37
+ "url": "git+https://github.com/equinor/fusion-framework.git",
38
+ "directory": "packages/cli-plugins/ai-index"
39
+ },
40
+ "dependencies": {
41
+ "@langchain/textsplitters": "^1.0.0",
42
+ "commander": "^14.0.1",
43
+ "find-up": "^8.0.0",
44
+ "globby": "^15.0.0",
45
+ "gray-matter": "^4.0.3",
46
+ "multimatch": "^7.0.0",
47
+ "read-package-up": "^11.0.0",
48
+ "rxjs": "^7.8.1",
49
+ "simple-git": "^3.28.0",
50
+ "tree-sitter": "^0.25.0",
51
+ "tree-sitter-typescript": "^0.23.2",
52
+ "ts-morph": "^27.0.2",
53
+ "zod": "^3.23.8",
54
+ "@equinor/fusion-framework-module": "5.0.5",
55
+ "@equinor/fusion-framework-module-ai": "2.0.0",
56
+ "@equinor/fusion-framework-cli-plugin-ai-base": "1.0.0",
57
+ "@equinor/fusion-imports": "1.1.8"
58
+ },
59
+ "peerDependencies": {
60
+ "@equinor/fusion-framework-cli": "13.0.0"
61
+ },
62
+ "devDependencies": {
63
+ "typescript": "^5.8.2",
64
+ "vitest": "^3.2.4"
65
+ },
66
+ "scripts": {
67
+ "build": "tsc -b",
68
+ "build:types": "tsc -b",
69
+ "watch": "tsc -b --watch",
70
+ "test": "vitest"
71
+ }
72
+ }
@@ -0,0 +1,77 @@
1
+ import path from 'node:path';
2
+ import { from, mergeMap, map, toArray } from 'rxjs';
3
+ import type { Observable } from 'rxjs';
4
+ import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
5
+ import { extractGitMetadata } from '../utils/git/index.js';
6
+ import { resolvePackage } from '../utils/package-resolver.js';
7
+ import type { DocumentEntry } from './types.js';
8
+ import type { FusionAIConfigWithIndex } from '../config.js';
9
+
10
+ /**
11
+ * Creates a stream that applies metadata to documents.
12
+ * @internal
13
+ */
14
+ export function applyMetadata(
15
+ document$: Observable<DocumentEntry>,
16
+ indexConfig: FusionAIConfigWithIndex['index'],
17
+ ): Observable<VectorStoreDocument[]> {
18
+ // Resolve packages if enabled
19
+ const shouldResolvePackage = indexConfig?.metadata?.resolvePackage ?? false;
20
+
21
+ return document$.pipe(
22
+ mergeMap((entry) => {
23
+ return from(entry.documents).pipe(
24
+ // Extract git metadata concurrently for all documents
25
+ mergeMap(async (document): Promise<VectorStoreDocument> => {
26
+ const rootPath = document.metadata.rootPath ?? process.cwd();
27
+ const sourcePath = path.join(rootPath, document.metadata.source);
28
+ const gitMetadata =
29
+ document.metadata.source && indexConfig?.metadata?.resolveGit !== false
30
+ ? await extractGitMetadata(sourcePath)
31
+ : {};
32
+
33
+ // Resolve package information if enabled
34
+ let packageMetadata = {};
35
+ if (shouldResolvePackage && document.metadata.source) {
36
+ packageMetadata = await resolvePackage(sourcePath)
37
+ .then((pkg) => {
38
+ return {
39
+ pkg_name: pkg?.name,
40
+ pkg_version: pkg?.version,
41
+ pkg_keywords: pkg?.keywords,
42
+ };
43
+ })
44
+ .catch(() => ({}));
45
+ }
46
+ return {
47
+ ...document,
48
+ metadata: {
49
+ ...document.metadata,
50
+ attributes: {
51
+ ...document.metadata.attributes,
52
+ ...gitMetadata,
53
+ ...packageMetadata,
54
+ },
55
+ },
56
+ };
57
+ }),
58
+ // Apply custom attribute processor from config
59
+ map((document: VectorStoreDocument) => {
60
+ const attributeProcessor =
61
+ indexConfig?.metadata?.attributeProcessor ||
62
+ ((attributes: Record<string, unknown>, _document: VectorStoreDocument) => attributes);
63
+ const attributes = attributeProcessor(document.metadata.attributes ?? {}, document);
64
+ return {
65
+ ...document,
66
+ metadata: {
67
+ ...document.metadata,
68
+ attributes,
69
+ },
70
+ };
71
+ }),
72
+ // Group back by file for batch deletion in next step
73
+ toArray(),
74
+ );
75
+ }),
76
+ );
77
+ }
@@ -0,0 +1,49 @@
1
+ import { map, mergeMap, toArray, filter } from 'rxjs';
2
+ import type { Observable } from 'rxjs';
3
+ import type { FrameworkInstance } from '@equinor/fusion-framework-cli-plugin-ai-base';
4
+ import type { ProcessedFile, DeleteRemovedFilesResult } from './types.js';
5
+ import type { CommandOptions } from '../command.options.js';
6
+
7
+ /**
8
+ * Creates a stream that deletes removed files from the vector store.
9
+ * @internal
10
+ */
11
+ export function createDeleteRemovedFilesStream(
12
+ removedFiles$: Observable<ProcessedFile>,
13
+ framework: FrameworkInstance,
14
+ options: CommandOptions,
15
+ ): Observable<DeleteRemovedFilesResult> {
16
+ return removedFiles$.pipe(
17
+ toArray(),
18
+ map((files) => {
19
+ if (files.length === 0) {
20
+ return { files: [], filterExpression: null };
21
+ }
22
+ // Build OData filter: "metadata/source eq 'path1' or metadata/source eq 'path2'"
23
+ const filterExpression = files
24
+ .map((file) => `metadata/source eq '${file.relativePath}'`)
25
+ .join(' or ');
26
+ return { files, filterExpression };
27
+ }),
28
+ mergeMap(async ({ files, filterExpression }) => {
29
+ if (files.length === 0) {
30
+ return undefined;
31
+ }
32
+ for (const file of files) {
33
+ console.log('Removing entry from vector store', file.relativePath);
34
+ }
35
+ if (!options.dryRun) {
36
+ const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
37
+ // Single batch deletion - one file can produce multiple document chunks
38
+ await vectorStoreService.deleteDocuments({
39
+ filter: { filterExpression: filterExpression ?? undefined },
40
+ });
41
+ }
42
+ return {
43
+ status: 'deleted',
44
+ files: files as { relativePath: string }[],
45
+ };
46
+ }),
47
+ filter((result): result is DeleteRemovedFilesResult => Boolean(result)),
48
+ );
49
+ }