@equinor/fusion-framework-cli-plugin-ai-index 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/LICENSE +21 -0
  3. package/README.md +114 -0
  4. package/dist/esm/bin/apply-metadata.js +63 -0
  5. package/dist/esm/bin/apply-metadata.js.map +1 -0
  6. package/dist/esm/bin/delete-removed-files.js +36 -0
  7. package/dist/esm/bin/delete-removed-files.js.map +1 -0
  8. package/dist/esm/bin/embed.js +196 -0
  9. package/dist/esm/bin/embed.js.map +1 -0
  10. package/dist/esm/bin/execute-pipeline.js +40 -0
  11. package/dist/esm/bin/execute-pipeline.js.map +1 -0
  12. package/dist/esm/bin/file-stream.js +22 -0
  13. package/dist/esm/bin/file-stream.js.map +1 -0
  14. package/dist/esm/bin/get-diff.js +29 -0
  15. package/dist/esm/bin/get-diff.js.map +1 -0
  16. package/dist/esm/bin/index.js +2 -0
  17. package/dist/esm/bin/index.js.map +1 -0
  18. package/dist/esm/bin/types.js +2 -0
  19. package/dist/esm/bin/types.js.map +1 -0
  20. package/dist/esm/command.js +82 -0
  21. package/dist/esm/command.js.map +1 -0
  22. package/dist/esm/command.options.js +48 -0
  23. package/dist/esm/command.options.js.map +1 -0
  24. package/dist/esm/config.js +2 -0
  25. package/dist/esm/config.js.map +1 -0
  26. package/dist/esm/index.js +13 -0
  27. package/dist/esm/index.js.map +1 -0
  28. package/dist/esm/utils/generate-chunk-id.js +18 -0
  29. package/dist/esm/utils/generate-chunk-id.js.map +1 -0
  30. package/dist/esm/utils/git/file-changes.js +196 -0
  31. package/dist/esm/utils/git/file-changes.js.map +1 -0
  32. package/dist/esm/utils/git/git-client.js +39 -0
  33. package/dist/esm/utils/git/git-client.js.map +1 -0
  34. package/dist/esm/utils/git/index.js +9 -0
  35. package/dist/esm/utils/git/index.js.map +1 -0
  36. package/dist/esm/utils/git/metadata.js +41 -0
  37. package/dist/esm/utils/git/metadata.js.map +1 -0
  38. package/dist/esm/utils/git/status.js +34 -0
  39. package/dist/esm/utils/git/status.js.map +1 -0
  40. package/dist/esm/utils/git/types.js +2 -0
  41. package/dist/esm/utils/git/types.js.map +1 -0
  42. package/dist/esm/utils/markdown/index.js +3 -0
  43. package/dist/esm/utils/markdown/index.js.map +1 -0
  44. package/dist/esm/utils/markdown/parser.js +72 -0
  45. package/dist/esm/utils/markdown/parser.js.map +1 -0
  46. package/dist/esm/utils/markdown/types.js +2 -0
  47. package/dist/esm/utils/markdown/types.js.map +1 -0
  48. package/dist/esm/utils/package-resolver.js +40 -0
  49. package/dist/esm/utils/package-resolver.js.map +1 -0
  50. package/dist/esm/utils/ts-doc/constants.js +13 -0
  51. package/dist/esm/utils/ts-doc/constants.js.map +1 -0
  52. package/dist/esm/utils/ts-doc/extractors.js +175 -0
  53. package/dist/esm/utils/ts-doc/extractors.js.map +1 -0
  54. package/dist/esm/utils/ts-doc/index.js +3 -0
  55. package/dist/esm/utils/ts-doc/index.js.map +1 -0
  56. package/dist/esm/utils/ts-doc/parser.js +37 -0
  57. package/dist/esm/utils/ts-doc/parser.js.map +1 -0
  58. package/dist/esm/utils/ts-doc/types.js +2 -0
  59. package/dist/esm/utils/ts-doc/types.js.map +1 -0
  60. package/dist/esm/utils/types.js +2 -0
  61. package/dist/esm/utils/types.js.map +1 -0
  62. package/dist/esm/version.js +3 -0
  63. package/dist/esm/version.js.map +1 -0
  64. package/dist/tsconfig.tsbuildinfo +1 -0
  65. package/dist/types/bin/apply-metadata.d.ts +1 -0
  66. package/dist/types/bin/delete-removed-files.d.ts +1 -0
  67. package/dist/types/bin/embed.d.ts +1 -0
  68. package/dist/types/bin/execute-pipeline.d.ts +1 -0
  69. package/dist/types/bin/file-stream.d.ts +1 -0
  70. package/dist/types/bin/get-diff.d.ts +1 -0
  71. package/dist/types/bin/index.d.ts +1 -0
  72. package/dist/types/bin/types.d.ts +1 -0
  73. package/dist/types/command.d.ts +2 -0
  74. package/dist/types/command.options.d.ts +62 -0
  75. package/dist/types/config.d.ts +33 -0
  76. package/dist/types/index.d.ts +8 -0
  77. package/dist/types/utils/generate-chunk-id.d.ts +8 -0
  78. package/dist/types/utils/git/file-changes.d.ts +21 -0
  79. package/dist/types/utils/git/git-client.d.ts +17 -0
  80. package/dist/types/utils/git/index.d.ts +5 -0
  81. package/dist/types/utils/git/metadata.d.ts +7 -0
  82. package/dist/types/utils/git/status.d.ts +12 -0
  83. package/dist/types/utils/git/types.d.ts +33 -0
  84. package/dist/types/utils/markdown/index.d.ts +2 -0
  85. package/dist/types/utils/markdown/parser.d.ts +21 -0
  86. package/dist/types/utils/markdown/types.d.ts +11 -0
  87. package/dist/types/utils/package-resolver.d.ts +14 -0
  88. package/dist/types/utils/ts-doc/constants.d.ts +5 -0
  89. package/dist/types/utils/ts-doc/extractors.d.ts +28 -0
  90. package/dist/types/utils/ts-doc/index.d.ts +2 -0
  91. package/dist/types/utils/ts-doc/parser.d.ts +23 -0
  92. package/dist/types/utils/ts-doc/types.d.ts +20 -0
  93. package/dist/types/utils/types.d.ts +17 -0
  94. package/dist/types/version.d.ts +1 -0
  95. package/package.json +72 -0
  96. package/src/bin/apply-metadata.ts +77 -0
  97. package/src/bin/delete-removed-files.ts +49 -0
  98. package/src/bin/embed.ts +262 -0
  99. package/src/bin/execute-pipeline.ts +48 -0
  100. package/src/bin/file-stream.ts +34 -0
  101. package/src/bin/get-diff.ts +33 -0
  102. package/src/bin/index.ts +1 -0
  103. package/src/bin/types.ts +48 -0
  104. package/src/command.options.ts +58 -0
  105. package/src/command.ts +100 -0
  106. package/src/config.ts +39 -0
  107. package/src/index.ts +19 -0
  108. package/src/utils/generate-chunk-id.ts +17 -0
  109. package/src/utils/git/file-changes.ts +213 -0
  110. package/src/utils/git/git-client.ts +43 -0
  111. package/src/utils/git/index.ts +19 -0
  112. package/src/utils/git/metadata.ts +47 -0
  113. package/src/utils/git/status.ts +48 -0
  114. package/src/utils/git/types.ts +36 -0
  115. package/src/utils/markdown/index.ts +5 -0
  116. package/src/utils/markdown/parser.ts +92 -0
  117. package/src/utils/markdown/types.ts +20 -0
  118. package/src/utils/package-resolver.ts +44 -0
  119. package/src/utils/ts-doc/constants.ts +13 -0
  120. package/src/utils/ts-doc/extractors.ts +246 -0
  121. package/src/utils/ts-doc/index.ts +5 -0
  122. package/src/utils/ts-doc/parser.ts +51 -0
  123. package/src/utils/ts-doc/types.ts +26 -0
  124. package/src/utils/types.ts +18 -0
  125. package/src/version.ts +2 -0
  126. package/tsconfig.json +27 -0
  127. package/vitest.config.ts +14 -0
@@ -0,0 +1,213 @@
1
+ import { join, relative } from 'node:path';
2
+ import type { ChangedFile, FileChangeStatus, GitDiffOptions } from './types.js';
3
+ import { resolveProjectRoot, getGit } from './git-client.js';
4
+
5
+ /**
6
+ * Get list of changed files using git diff with status
7
+ * @param options - Git diff configuration options
8
+ * @returns Array of changed files with their status
9
+ */
10
+ export const getChangedFiles = async (options: GitDiffOptions): Promise<ChangedFile[]> => {
11
+ const { diff, baseRef = 'HEAD~1', cwd = process.cwd() } = options;
12
+
13
+ if (!diff) {
14
+ return [];
15
+ }
16
+
17
+ const projectRoot = resolveProjectRoot(cwd);
18
+ if (!projectRoot) {
19
+ throw new Error('Not in a git repository. Cannot use --diff option.');
20
+ }
21
+
22
+ const { git } = getGit(cwd) ?? {};
23
+ if (!git) {
24
+ throw new Error('Failed to initialize git client');
25
+ }
26
+
27
+ try {
28
+ // Get changes since baseRef with status (A=added, M=modified, D=deleted)
29
+ try {
30
+ const diffResult = await git.diff([`${baseRef}`, '--name-status']);
31
+ const lines = diffResult.split('\n').filter((line) => line.trim() !== '');
32
+
33
+ const changedFiles: ChangedFile[] = [];
34
+
35
+ for (const line of lines) {
36
+ // Match status and file path
37
+ // Format: "A\tfile.ts" or "M\tfile.ts" or "D\tfile.ts"
38
+ // Also handle renames: "R100\told.ts\tnew.ts"
39
+ const renameMatch = line.match(/^R\d*\s+(.+?)\s+(.+)$/);
40
+ if (renameMatch) {
41
+ const [, oldFile, newFile] = renameMatch;
42
+ // Add both the removed old file and the new file
43
+ changedFiles.push({ filepath: `${projectRoot}/${oldFile}`, status: 'removed' });
44
+ changedFiles.push({ filepath: `${projectRoot}/${newFile}`, status: 'new' });
45
+ continue;
46
+ }
47
+
48
+ const match = line.match(/^([AMD])\s+(.+)$/);
49
+ if (match) {
50
+ const [, gitStatus, file] = match;
51
+ const fullPath = `${projectRoot}/${file}`;
52
+
53
+ let status: FileChangeStatus;
54
+ if (gitStatus === 'A') {
55
+ status = 'new';
56
+ } else if (gitStatus === 'M') {
57
+ status = 'modified';
58
+ } else if (gitStatus === 'D') {
59
+ status = 'removed';
60
+ } else {
61
+ // Skip unknown statuses (C=copied, etc.)
62
+ continue;
63
+ }
64
+
65
+ changedFiles.push({ filepath: fullPath, status });
66
+ }
67
+ }
68
+
69
+ return changedFiles;
70
+ } catch {
71
+ // Handle case where baseRef doesn't exist (e.g., first commit)
72
+ console.warn(`⚠️ Warning: Git reference '${baseRef}' not found. Processing all files.`);
73
+ return [];
74
+ }
75
+ } catch (error) {
76
+ throw new Error(`Git diff failed: ${error instanceof Error ? error.message : String(error)}`);
77
+ }
78
+ };
79
+
80
+ /**
81
+ * Determine the git status of a file, including handling renames
82
+ * Returns an array of ChangedFile objects - if the file was renamed, returns both old and new paths
83
+ * @param filePath - Absolute file path to check
84
+ * @returns Promise resolving to array of changed files (1 or 2 items if renamed)
85
+ */
86
+ export const getFileStatus = async (filePath: string): Promise<ChangedFile[]> => {
87
+ const { git, gitRepoPath } = getGit(filePath) ?? {};
88
+ if (!git || !gitRepoPath) {
89
+ // Not in a git repository, assume new
90
+ return [{ filepath: filePath, status: 'new' }];
91
+ }
92
+
93
+ const gitFilePath = relative(gitRepoPath, filePath);
94
+ // Normalize path separators for git commands (git uses forward slashes on all platforms)
95
+ const normalizedGitFilePath = gitFilePath.replace(/\\/g, '/');
96
+
97
+ try {
98
+ // First check if file is tracked in git at the current path
99
+ const isTracked = await git
100
+ .raw(['ls-files', '--error-unmatch', normalizedGitFilePath])
101
+ .then(() => true)
102
+ .catch(() => false);
103
+
104
+ if (isTracked) {
105
+ // File is tracked at this path, it's modified
106
+ return [{ filepath: filePath, status: 'modified' }];
107
+ }
108
+
109
+ // File is not tracked - quickly check if it's explicitly untracked
110
+ // This is much faster than checking full status or history
111
+ try {
112
+ const fileStatusOutput = await git.raw([
113
+ 'status',
114
+ '--porcelain',
115
+ '--',
116
+ normalizedGitFilePath,
117
+ ]);
118
+ const trimmed = fileStatusOutput.trim();
119
+
120
+ if (trimmed.length > 0) {
121
+ // If status shows ??, it's untracked (truly new)
122
+ if (/^\?\?/.test(trimmed)) {
123
+ return [{ filepath: filePath, status: 'new' }];
124
+ }
125
+ }
126
+ } catch {
127
+ // If status check fails, continue to rename/history checks
128
+ }
129
+
130
+ // File is not tracked and not explicitly untracked - check if it's a rename
131
+ // Only do expensive checks if we haven't determined status yet
132
+ try {
133
+ // Get full git status to check for renames (only if needed)
134
+ const statusOutput = await git.raw(['status', '--porcelain']);
135
+ const lines = statusOutput.split('\n').filter((line) => line.trim() !== '');
136
+
137
+ for (const line of lines) {
138
+ // Check for rename format: "R100\told.ts\tnew.ts"
139
+ const renameMatch = line.match(/^R\d+\s+(.+?)\s+(.+)$/);
140
+ if (renameMatch) {
141
+ const [, oldPath, newPath] = renameMatch;
142
+ const oldFullPath = join(gitRepoPath, oldPath);
143
+ const newFullPath = join(gitRepoPath, newPath);
144
+
145
+ // Check if the current file is the new path in a rename
146
+ if (newFullPath === filePath) {
147
+ return [
148
+ { filepath: oldFullPath, status: 'removed' },
149
+ { filepath: newFullPath, status: 'new' },
150
+ ];
151
+ }
152
+ }
153
+
154
+ // Check for copy format: "C100\told.ts\tnew.ts" (similar to rename)
155
+ const copyMatch = line.match(/^C\d+\s+(.+?)\s+(.+)$/);
156
+ if (copyMatch) {
157
+ const [, , newPath] = copyMatch;
158
+ const newFullPath = join(gitRepoPath, newPath);
159
+
160
+ // For copies, the old file still exists, so only return the new one
161
+ if (newFullPath === filePath) {
162
+ return [{ filepath: newFullPath, status: 'new' }];
163
+ }
164
+ }
165
+ }
166
+ } catch {
167
+ // If status check fails, continue to history check
168
+ }
169
+
170
+ // Last resort: check if file content exists in git history (very slow, only if needed)
171
+ // Use --follow to track renames, limit to 1 commit for performance
172
+ try {
173
+ const hasHistory = await git
174
+ .raw([
175
+ 'log',
176
+ '--all',
177
+ '--full-history',
178
+ '--follow',
179
+ '--oneline',
180
+ '-1',
181
+ '--',
182
+ normalizedGitFilePath,
183
+ ])
184
+ .then((output) => output.trim().length > 0)
185
+ .catch(() => false);
186
+
187
+ // If file has history but isn't tracked, it might have been moved
188
+ // For now, treat as 'new' at the new location
189
+ // Note: We can't easily find the old path without more complex git operations
190
+ return [{ filepath: filePath, status: hasHistory ? 'modified' : 'new' }];
191
+ } catch {
192
+ // If we can't determine, default to 'new'
193
+ return [{ filepath: filePath, status: 'new' }];
194
+ }
195
+ } catch {
196
+ // If we can't determine status, default to 'new'
197
+ return [{ filepath: filePath, status: 'new' }];
198
+ }
199
+ };
200
+
201
+ /**
202
+ * Check if a file path matches any of the changed files
203
+ * @param filePath - File path to check
204
+ * @param changedFiles - Array of changed file objects
205
+ * @returns True if file has changed
206
+ */
207
+ export const isFileChanged = (filePath: string, changedFiles: ChangedFile[]): boolean => {
208
+ if (changedFiles.length === 0) {
209
+ return true; // If no diff filtering, process all files
210
+ }
211
+
212
+ return changedFiles.some((file) => file.filepath === filePath);
213
+ };
@@ -0,0 +1,43 @@
1
+ import { simpleGit, type SimpleGit } from 'simple-git';
2
+ import { findUpSync } from 'find-up';
3
+ import { dirname, join } from 'node:path';
4
+ import { existsSync } from 'node:fs';
5
+
6
+ const gitCache = new Map<string, SimpleGit>();
7
+
8
+ /**
9
+ * Resolve the project root (git repository root) for a given file path
10
+ * @param filePath - File path to resolve from
11
+ * @returns Project root path or undefined if not in a git repository
12
+ */
13
+ export const resolveProjectRoot = (filePath: string): string | undefined => {
14
+ // if we are in the root of the git repository, return the root
15
+ if (existsSync(join(filePath, '.git'))) {
16
+ return filePath;
17
+ }
18
+ const gitRepoPath = findUpSync('.git', { cwd: dirname(filePath), type: 'both' });
19
+ const projectRoot = gitRepoPath?.replace(/\.git$/, '');
20
+ return projectRoot;
21
+ };
22
+
23
+ /**
24
+ * Get or create a SimpleGit instance for a given file path
25
+ * Uses caching to avoid creating multiple instances for the same repository
26
+ * @param filePath - File path to get git instance for
27
+ * @returns Git instance and repository path, or undefined if not in a git repository
28
+ */
29
+ export const getGit = (
30
+ filePath: string,
31
+ ): { git: SimpleGit | undefined; gitRepoPath: string } | undefined => {
32
+ const gitRepoPath = resolveProjectRoot(filePath);
33
+ if (gitRepoPath) {
34
+ if (!gitCache.has(gitRepoPath)) {
35
+ gitCache.set(gitRepoPath, simpleGit(gitRepoPath));
36
+ }
37
+ return {
38
+ git: gitCache.get(gitRepoPath),
39
+ gitRepoPath,
40
+ };
41
+ }
42
+ return undefined;
43
+ };
@@ -0,0 +1,19 @@
1
+ // Re-export all types
2
+ export type {
3
+ GitMetadata,
4
+ GitDiffOptions,
5
+ FileChangeStatus,
6
+ ChangedFile,
7
+ } from './types.js';
8
+
9
+ // Re-export git client utilities
10
+ export { resolveProjectRoot, getGit } from './git-client.js';
11
+
12
+ // Re-export metadata functions
13
+ export { extractGitMetadata } from './metadata.js';
14
+
15
+ // Re-export file change functions
16
+ export { getChangedFiles, getFileStatus, isFileChanged } from './file-changes.js';
17
+
18
+ // Re-export status functions
19
+ export { getGitStatus } from './status.js';
@@ -0,0 +1,47 @@
1
+ import { relative } from 'node:path';
2
+ import type { GitMetadata } from './types.js';
3
+ import { getGit } from './git-client.js';
4
+
5
+ /**
6
+ * Generate a GitHub permalink for a file
7
+ * @param gitRemoteUrl - Git remote URL
8
+ * @param filePath - Relative file path from repository root
9
+ * @param slug - Git reference (branch/tag/commit), defaults to 'main'
10
+ * @returns GitHub permalink URL or undefined if not a GitHub repository
11
+ * @internal
12
+ */
13
+ const generateGithubPermalink = (
14
+ gitRemoteUrl: string,
15
+ filePath: string,
16
+ slug?: string,
17
+ ): string | undefined => {
18
+ const githubMatch = gitRemoteUrl.match(/github\.com[:/]([^/]+)\/([^/.]+)(?:\.git)?$/);
19
+ if (githubMatch) {
20
+ const [, owner, repo] = githubMatch;
21
+ return `https://github.com/${owner}/${repo}/blob/${slug ?? 'main'}/${filePath}`;
22
+ }
23
+ return undefined;
24
+ };
25
+
26
+ /**
27
+ * Extract git metadata for a file
28
+ * @param filePath - Absolute file path
29
+ * @returns Git metadata or undefined if not in a git repository
30
+ */
31
+ export const extractGitMetadata = async (filePath: string): Promise<GitMetadata | undefined> => {
32
+ const { git, gitRepoPath: gitRepoRoot } = getGit(filePath) ?? {};
33
+ if (!git || !gitRepoRoot) {
34
+ return undefined;
35
+ }
36
+ const gitFilePath = relative(gitRepoRoot, filePath);
37
+ const { latest } = await git.log({ file: gitFilePath, maxCount: 1 });
38
+ const gitRemoteUrl = await git
39
+ .getConfig('remote.origin.url')
40
+ .then(({ value }) => value ?? undefined);
41
+ const git_link = gitRemoteUrl ? generateGithubPermalink(gitRemoteUrl, gitFilePath) : undefined;
42
+ return {
43
+ git_link,
44
+ git_commit_hash: latest?.hash,
45
+ git_commit_date: latest?.date,
46
+ };
47
+ };
@@ -0,0 +1,48 @@
1
+ import { resolveProjectRoot, getGit } from './git-client.js';
2
+
3
+ /**
4
+ * Get git status information for debugging
5
+ * @param cwd - Working directory
6
+ * @returns Git status information
7
+ */
8
+ export const getGitStatus = async (
9
+ cwd: string = process.cwd(),
10
+ ): Promise<{
11
+ branch: string;
12
+ commit: string;
13
+ hasChanges: boolean;
14
+ stagedFiles: number;
15
+ unstagedFiles: number;
16
+ }> => {
17
+ const projectRoot = resolveProjectRoot(cwd);
18
+ if (!projectRoot) {
19
+ throw new Error('Not in a git repository');
20
+ }
21
+
22
+ const { git } = getGit(cwd) ?? {};
23
+ if (!git) {
24
+ throw new Error('Failed to initialize git client');
25
+ }
26
+
27
+ try {
28
+ const branch = await git.revparse(['--abbrev-ref', 'HEAD']);
29
+ const commit = await git.revparse(['--short', 'HEAD']);
30
+
31
+ const statusResult = await git.status();
32
+ const stagedFiles = statusResult.staged.length;
33
+ const unstagedFiles =
34
+ statusResult.modified.length + statusResult.deleted.length + statusResult.not_added.length;
35
+
36
+ return {
37
+ branch: branch.trim(),
38
+ commit: commit.trim(),
39
+ hasChanges: stagedFiles > 0 || unstagedFiles > 0,
40
+ stagedFiles,
41
+ unstagedFiles,
42
+ };
43
+ } catch (error) {
44
+ throw new Error(
45
+ `Failed to get git status: ${error instanceof Error ? error.message : String(error)}`,
46
+ );
47
+ }
48
+ };
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Git metadata extracted from repository
3
+ */
4
+ export type GitMetadata = Partial<{
5
+ git_remote_url: string;
6
+ git_commit_hash: string;
7
+ git_commit_date: string;
8
+ git_link: string;
9
+ }>;
10
+
11
+ /**
12
+ * Git diff options for filtering changed files
13
+ */
14
+ export interface GitDiffOptions {
15
+ /** Enable diff-based file filtering */
16
+ diff: boolean;
17
+ /** Git reference to compare against (default: HEAD~1) */
18
+ baseRef?: string;
19
+ /** Working directory for git operations */
20
+ cwd?: string;
21
+ }
22
+
23
+ /**
24
+ * File change status
25
+ */
26
+ export type FileChangeStatus = 'new' | 'modified' | 'removed';
27
+
28
+ /**
29
+ * Changed file information
30
+ */
31
+ export interface ChangedFile {
32
+ /** Absolute file path */
33
+ filepath: string;
34
+ /** Change status: new, modified, or removed */
35
+ status: FileChangeStatus;
36
+ }
@@ -0,0 +1,5 @@
1
+ // Re-export all types
2
+ export type { MarkdownMetadata, MarkdownDocument } from './types.js';
3
+
4
+ // Re-export parser functions
5
+ export { isMarkdownFile, parseMarkdown, parseMarkdownFile } from './parser.js';
@@ -0,0 +1,92 @@
1
+ import { readFileSync } from 'node:fs';
2
+ import { assert } from 'node:console';
3
+
4
+ import { default as grayMatter } from 'gray-matter';
5
+
6
+ import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
7
+
8
+ import type { SourceFile } from '../types.js';
9
+ import type { MarkdownDocument, MarkdownMetadata } from './types.js';
10
+ import { generateChunkId } from '../generate-chunk-id.js';
11
+
12
+ const markdownConfig = {
13
+ chunkSize: 3000,
14
+ chunkOverlap: 300,
15
+ keepSeparator: true,
16
+ separators: [
17
+ '\n# ',
18
+ '\n## ',
19
+ '\n### ',
20
+ '\n#### ',
21
+ '\n##### ',
22
+ '\n###### ',
23
+ '\n```',
24
+ '\n---\n',
25
+ '\n\n',
26
+ ],
27
+ };
28
+
29
+ /**
30
+ * Check if a file is a markdown or MDX file
31
+ * @param filePath - File path to check
32
+ * @returns True if file has .md or .mdx extension
33
+ */
34
+ export const isMarkdownFile = (filePath: string): boolean => {
35
+ return filePath.endsWith('.md') || filePath.endsWith('.mdx');
36
+ };
37
+
38
+ /**
39
+ * Parse markdown or MDX content into document chunks
40
+ * @param content - Markdown or MDX content string
41
+ * @param source - Source file path
42
+ * @returns Array of markdown documents
43
+ */
44
+ export const parseMarkdown = async <T extends Record<string, unknown> = Record<string, unknown>>(
45
+ content: string,
46
+ source: string,
47
+ ): Promise<MarkdownDocument<T>[]> => {
48
+ const { content: markdownContent, data } = grayMatter(content);
49
+ const markdownAttributes = Object.entries(data).reduce(
50
+ (acc, [key, value]) => {
51
+ acc[`md_${key}`] = value;
52
+ return acc;
53
+ },
54
+ {
55
+ type: 'markdown',
56
+ } as Record<string, unknown>,
57
+ );
58
+ const textSplitter = new RecursiveCharacterTextSplitter(markdownConfig);
59
+ const chunks = await textSplitter.splitText(markdownContent);
60
+ return chunks.map(
61
+ (chunk, _index): MarkdownDocument<T> => ({
62
+ id: generateChunkId(source, _index),
63
+ pageContent: chunk,
64
+ metadata: {
65
+ source,
66
+ attributes: markdownAttributes as MarkdownMetadata<T>['attributes'],
67
+ },
68
+ }),
69
+ );
70
+ };
71
+
72
+ /**
73
+ * Parse a markdown or MDX file into document chunks
74
+ * @param file - Source file object
75
+ * @returns Array of markdown documents with root path metadata
76
+ */
77
+ export const parseMarkdownFile = async <
78
+ T extends Record<string, unknown> = Record<string, unknown>,
79
+ >(
80
+ file: SourceFile,
81
+ ): Promise<MarkdownDocument<T>[]> => {
82
+ assert(isMarkdownFile(file.path), `File ${file.path} is not a markdown or MDX file`);
83
+ const content = readFileSync(file.path, 'utf8');
84
+ const result = await parseMarkdown<T>(content, file.relativePath ?? file.path);
85
+ return result.map((document) => ({
86
+ ...document,
87
+ metadata: {
88
+ ...document.metadata,
89
+ rootPath: file.projectRoot,
90
+ },
91
+ }));
92
+ };
@@ -0,0 +1,20 @@
1
+ import type {
2
+ VectorStoreDocument,
3
+ VectorStoreDocumentMetadata,
4
+ } from '@equinor/fusion-framework-module-ai/lib';
5
+
6
+ /**
7
+ * Markdown document metadata
8
+ */
9
+ export type MarkdownMetadata<T extends Record<string, unknown> = Record<string, unknown>> =
10
+ VectorStoreDocumentMetadata<
11
+ T & {
12
+ type: 'markdown';
13
+ }
14
+ >;
15
+
16
+ /**
17
+ * Markdown document
18
+ */
19
+ export type MarkdownDocument<T extends Record<string, unknown> = Record<string, unknown>> =
20
+ VectorStoreDocument<MarkdownMetadata<T>>;
@@ -0,0 +1,44 @@
1
+ import { dirname } from 'node:path';
2
+ import { readPackageUp, type PackageJson } from 'read-package-up';
3
+
4
+ /**
5
+ * Map of package base directories to package.json
6
+ */
7
+ const packageMap = new Map<string, PackageJson>();
8
+
9
+ /**
10
+ * Resolves which package a file path belongs to.
11
+ * First checks the cache map, then uses read-package-up if no match found.
12
+ *
13
+ * @param filePath - Absolute or relative file path (e.g., '/path/to/packages/cli/src/index.ts')
14
+ * @returns Package.json if found, undefined otherwise
15
+ *
16
+ * @example
17
+ * ```ts
18
+ * const packageJson = await resolvePackage('/path/to/packages/cli/src/index.ts');
19
+ * ```
20
+ */
21
+ export async function resolvePackage(filePath: string): Promise<PackageJson | undefined> {
22
+ // Check cache: iterate through known package directories
23
+ for (const packageRoot of packageMap.keys()) {
24
+ if (filePath.startsWith(packageRoot)) {
25
+ const packageJson = packageMap.get(packageRoot);
26
+ if (packageJson) {
27
+ return packageJson;
28
+ }
29
+ }
30
+ }
31
+
32
+ // Not in cache, resolve using read-package-up
33
+ // readPackageUp expects a directory path, not a file path
34
+ const dirPath = dirname(filePath);
35
+ const result = await readPackageUp({ cwd: dirPath, normalize: false });
36
+
37
+ if (result) {
38
+ // Cache using the package directory (where package.json is located)
39
+ const packageDir = dirname(result.path);
40
+ packageMap.set(packageDir, result.packageJson);
41
+ }
42
+
43
+ return result?.packageJson;
44
+ }
@@ -0,0 +1,13 @@
1
+ import { SyntaxKind } from 'ts-morph';
2
+
3
+ /**
4
+ * Supported TSDoc node kinds for top-level processing
5
+ */
6
+ export const nodeKinds = [
7
+ SyntaxKind.FunctionDeclaration,
8
+ SyntaxKind.ClassDeclaration,
9
+ SyntaxKind.InterfaceDeclaration,
10
+ SyntaxKind.TypeAliasDeclaration,
11
+ SyntaxKind.VariableStatement,
12
+ SyntaxKind.EnumDeclaration,
13
+ ];