@opensumi/ide-ai-native 3.0.3 → 3.0.4-next-1716367246.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/lib/browser/inline-completions/completeProvider.d.ts +5 -3
  2. package/lib/browser/inline-completions/completeProvider.d.ts.map +1 -1
  3. package/lib/browser/inline-completions/completeProvider.js +16 -30
  4. package/lib/browser/inline-completions/completeProvider.js.map +1 -1
  5. package/lib/browser/inline-completions/constants.d.ts +2 -6
  6. package/lib/browser/inline-completions/constants.d.ts.map +1 -1
  7. package/lib/browser/inline-completions/constants.js +81 -4
  8. package/lib/browser/inline-completions/constants.js.map +1 -1
  9. package/lib/browser/inline-completions/prompt/const.d.ts +9 -0
  10. package/lib/browser/inline-completions/prompt/const.d.ts.map +1 -0
  11. package/lib/browser/inline-completions/prompt/const.js +284 -0
  12. package/lib/browser/inline-completions/prompt/const.js.map +1 -0
  13. package/lib/browser/inline-completions/prompt/importedFiles.d.ts +6 -0
  14. package/lib/browser/inline-completions/prompt/importedFiles.d.ts.map +1 -0
  15. package/lib/browser/inline-completions/prompt/importedFiles.js +77 -0
  16. package/lib/browser/inline-completions/prompt/importedFiles.js.map +1 -0
  17. package/lib/browser/inline-completions/prompt/jaccardMatcher.d.ts +31 -0
  18. package/lib/browser/inline-completions/prompt/jaccardMatcher.d.ts.map +1 -0
  19. package/lib/browser/inline-completions/prompt/jaccardMatcher.js +75 -0
  20. package/lib/browser/inline-completions/prompt/jaccardMatcher.js.map +1 -0
  21. package/lib/browser/inline-completions/prompt/languages.d.ts +4 -0
  22. package/lib/browser/inline-completions/prompt/languages.d.ts.map +1 -0
  23. package/lib/browser/inline-completions/prompt/languages.js +67 -0
  24. package/lib/browser/inline-completions/prompt/languages.js.map +1 -0
  25. package/lib/browser/inline-completions/prompt/matcher.d.ts +42 -0
  26. package/lib/browser/inline-completions/prompt/matcher.d.ts.map +1 -0
  27. package/lib/browser/inline-completions/prompt/matcher.js +279 -0
  28. package/lib/browser/inline-completions/prompt/matcher.js.map +1 -0
  29. package/lib/browser/inline-completions/prompt/prompt.d.ts +24 -0
  30. package/lib/browser/inline-completions/prompt/prompt.d.ts.map +1 -0
  31. package/lib/browser/inline-completions/prompt/prompt.js +242 -0
  32. package/lib/browser/inline-completions/prompt/prompt.js.map +1 -0
  33. package/lib/browser/inline-completions/prompt/similarSnippets.d.ts +9 -0
  34. package/lib/browser/inline-completions/prompt/similarSnippets.d.ts.map +1 -0
  35. package/lib/browser/inline-completions/prompt/similarSnippets.js +110 -0
  36. package/lib/browser/inline-completions/prompt/similarSnippets.js.map +1 -0
  37. package/lib/browser/inline-completions/prompt/tokenizer.d.ts +4 -0
  38. package/lib/browser/inline-completions/prompt/tokenizer.d.ts.map +1 -0
  39. package/lib/browser/inline-completions/prompt/tokenizer.js +18 -0
  40. package/lib/browser/inline-completions/prompt/tokenizer.js.map +1 -0
  41. package/lib/browser/inline-completions/provider.d.ts +5 -12
  42. package/lib/browser/inline-completions/provider.d.ts.map +1 -1
  43. package/lib/browser/inline-completions/provider.js +79 -44
  44. package/lib/browser/inline-completions/provider.js.map +1 -1
  45. package/lib/browser/inline-completions/types.d.ts +154 -0
  46. package/lib/browser/inline-completions/types.d.ts.map +1 -0
  47. package/lib/browser/inline-completions/types.js +59 -0
  48. package/lib/browser/inline-completions/types.js.map +1 -0
  49. package/lib/browser/languages/parser.d.ts +6 -0
  50. package/lib/browser/languages/parser.d.ts.map +1 -1
  51. package/lib/browser/languages/parser.js +140 -1
  52. package/lib/browser/languages/parser.js.map +1 -1
  53. package/lib/browser/languages/tree-sitter/language-facts/types.d.ts +1 -1
  54. package/lib/browser/languages/tree-sitter/wasm-manager.d.ts +2 -2
  55. package/lib/browser/languages/tree-sitter/wasm-manager.d.ts.map +1 -1
  56. package/lib/browser/languages/tree-sitter/wasm-manager.js +2 -2
  57. package/lib/browser/languages/tree-sitter/wasm-manager.js.map +1 -1
  58. package/lib/common/utils.d.ts +5 -0
  59. package/lib/common/utils.d.ts.map +1 -0
  60. package/lib/common/utils.js +26 -0
  61. package/lib/common/utils.js.map +1 -0
  62. package/package.json +20 -19
  63. package/src/browser/inline-completions/completeProvider.ts +26 -35
  64. package/src/browser/inline-completions/constants.ts +87 -7
  65. package/src/browser/inline-completions/prompt/const.ts +286 -0
  66. package/src/browser/inline-completions/prompt/importedFiles.ts +92 -0
  67. package/src/browser/inline-completions/prompt/jaccardMatcher.ts +98 -0
  68. package/src/browser/inline-completions/prompt/languages.ts +65 -0
  69. package/src/browser/inline-completions/prompt/matcher.ts +328 -0
  70. package/src/browser/inline-completions/prompt/prompt.ts +297 -0
  71. package/src/browser/inline-completions/prompt/similarSnippets.ts +130 -0
  72. package/src/browser/inline-completions/prompt/tokenizer.ts +16 -0
  73. package/src/browser/inline-completions/provider.ts +107 -42
  74. package/src/browser/inline-completions/types.ts +169 -0
  75. package/src/browser/languages/parser.ts +147 -0
  76. package/src/browser/languages/tree-sitter/wasm-manager.ts +3 -3
  77. package/src/common/utils.ts +23 -0
@@ -0,0 +1,297 @@
1
+ // @ts-ignore
2
+ import { Tiktoken } from 'js-tiktoken';
3
+
4
+ import { Injector } from '@opensumi/di';
5
+
6
+ import { LanguageParser } from '../../languages/parser';
7
+ import { LanguageParserService } from '../../languages/service';
8
+ import { StrategyType, WishListAttributeName } from '../types';
9
+
10
+ import { LANGUAGE_COMMENT_MARKERS } from './const';
11
+ import { getTokenizer } from './tokenizer';
12
+
13
+ import type { ICompletionContext, ICompletionModel, MarkerItem } from '../types';
14
+ import type * as monaco from '@opensumi/ide-monaco';
15
+
16
+ export const addComment = (commentText?: string) => {
17
+ if (commentText) {
18
+ return `${commentText}\n`;
19
+ }
20
+ return '';
21
+ };
22
+
23
+ export const getMarkerByLanguage = (text: string, language: string) => {
24
+ const marker = LANGUAGE_COMMENT_MARKERS[language];
25
+ if (marker) {
26
+ const end = marker.end === '' ? '' : ` ${marker.end}`;
27
+ return `${marker.start} ${text}${end}`;
28
+ }
29
+ return '';
30
+ };
31
+
32
+ export const getLanguageMarker = (language: string) => {
33
+ if (!language) {
34
+ return '';
35
+ }
36
+ const supportLanguage: {
37
+ [key: string]: string;
38
+ } = {
39
+ html: '<!DOCTYPE html>',
40
+ python: '#!/usr/bin/env python3',
41
+ ruby: '#!/usr/bin/env ruby',
42
+ shellscript: '#!/bin/sh',
43
+ yaml: '# YAML data',
44
+ };
45
+ let marker = '';
46
+ if (supportLanguage[language]) {
47
+ return supportLanguage[language];
48
+ }
49
+ marker = `Language: ${language}`;
50
+
51
+ return getMarkerByLanguage(marker, language);
52
+ };
53
+
54
+ // 根据语言类型获取不同的注释内容
55
+ export const getPathMarker = (path: string, language: string) => path && getMarkerByLanguage(`Path: ${path}`, language);
56
+
57
+ export const getMarkerForSnippets = (text: string, language: string) => {
58
+ const lines = text.split('\n');
59
+ return lines.map((line) => getMarkerByLanguage(line, language)).join('\n');
60
+ };
61
+
62
+ export const getCroppedTextByLine = (text: string, maxTokenSize: number, textTokens: number[][], reverse = false) => {
63
+ const currentTokenSize = textTokens.reduce((prev, cur) => prev + cur.length, 0);
64
+ if (currentTokenSize < maxTokenSize) {
65
+ return text;
66
+ }
67
+ const lines = text.split('\n');
68
+ const endLine = lines.length;
69
+ let currenSize = 0;
70
+ if (reverse) {
71
+ let index = 0;
72
+ for (; index < endLine; index++) {
73
+ const tokens = textTokens[index];
74
+ currenSize += tokens.length;
75
+ if (currenSize < maxTokenSize) {
76
+ continue;
77
+ } else {
78
+ break;
79
+ }
80
+ }
81
+ if (index === endLine) {
82
+ return text;
83
+ }
84
+ return lines.slice(0, index).join('\n');
85
+ }
86
+ let index = endLine - 1;
87
+ for (; index >= 0; index--) {
88
+ const tokens = textTokens[index];
89
+ currenSize += tokens.length;
90
+ if (currenSize < maxTokenSize) {
91
+ continue;
92
+ } else {
93
+ break;
94
+ }
95
+ }
96
+ if (index === -1) {
97
+ return text;
98
+ }
99
+ return lines.slice(index).join('\n');
100
+ };
101
+
102
+ /**
103
+ * 裁剪字符函数
104
+ * @param text 文本内容
105
+ * @param maxTokenSize 最大 token 数量
106
+ * @param strategy 分割策略
107
+ * @param language 语言类型,按函数分割仅在 typescript/javascript/typescriptreact 中支持
108
+ * @param reverse 是否反向裁剪,即从后往前裁剪
109
+ */
110
+ export const getCroppedText = async (
111
+ text: string,
112
+ maxTokenSize: number,
113
+ textTokens: number[][],
114
+ strategy = StrategyType.InterceptBasedOnLine,
115
+ tokenizer: Tiktoken,
116
+ parser?: LanguageParser,
117
+ minBlockSize = 20,
118
+ reverse = false,
119
+ token?: monaco.CancellationToken,
120
+ ): Promise<string> => {
121
+ let tokens: number[];
122
+ if (strategy === StrategyType.InterceptBasedOnLine) {
123
+ // 按行进行裁剪
124
+ text = getCroppedTextByLine(text, maxTokenSize, textTokens, reverse);
125
+ } else if (strategy === StrategyType.InterceptBasedOnFunction && !reverse) {
126
+ tokens = tokenizer.encode(text);
127
+ // 按函数进行裁剪
128
+ while (tokens.length > maxTokenSize) {
129
+ if (reverse) {
130
+ try {
131
+ text = text.slice(0, Math.ceil((maxTokenSize / tokens.length) * tokens.length));
132
+ text = (await parser?.trimSuffixSyntaxErrors(text, minBlockSize)) || text;
133
+ } catch {
134
+ text = getCroppedTextByLine(text, maxTokenSize, textTokens, reverse);
135
+ }
136
+ } else {
137
+ try {
138
+ text = text.slice(-Math.ceil((maxTokenSize / tokens.length) * tokens.length));
139
+ text = (await parser?.trimPrefixSyntaxErrors(text, minBlockSize)) || text;
140
+ if (token?.isCancellationRequested) {
141
+ return '';
142
+ }
143
+ } catch {
144
+ text = getCroppedTextByLine(text, maxTokenSize, textTokens, reverse);
145
+ }
146
+ }
147
+ tokens = tokenizer.encode(text);
148
+ }
149
+ } else {
150
+ tokens = tokenizer.encode(text);
151
+ // 按字符进行裁剪
152
+ while (tokens.length > maxTokenSize) {
153
+ const splitWords = text.split('');
154
+ if (reverse) {
155
+ text = splitWords.slice(0, -(tokens.length - maxTokenSize)).join('');
156
+ } else {
157
+ text = splitWords.slice(tokens.length - maxTokenSize).join('');
158
+ }
159
+ tokens = tokenizer.encode(text);
160
+ }
161
+ }
162
+ return text;
163
+ };
164
+
165
+ export const getBeforePrompt = async (
166
+ promptPriority: MarkerItem[],
167
+ context: ICompletionContext,
168
+ promptConfig: ICompletionModel,
169
+ leftTokenSize: number,
170
+ injector: Injector,
171
+ token: monaco.CancellationToken,
172
+ ) => {
173
+ const { tokenizerName, wishList } = promptConfig;
174
+ const tokenizer = getTokenizer(tokenizerName);
175
+
176
+ // 根据优先级降序排序
177
+ const sortedPromptPriority = promptPriority.sort((a, b) => b.priority - a.priority);
178
+ const promptList: {
179
+ [key in WishListAttributeName]?: string;
180
+ } = {};
181
+ for (const current of sortedPromptPriority) {
182
+ if (!leftTokenSize) {
183
+ return;
184
+ }
185
+ if (current.content) {
186
+ const tokens = current.content.split('\n').map((line) => tokenizer.encode(line));
187
+ const currentTokenSize = tokens.reduce((prev, cur) => prev + cur.length, 0);
188
+ const maxTokenSize = Math.ceil(current.maxPercent * leftTokenSize);
189
+ if (currentTokenSize > leftTokenSize) {
190
+ const languageParserService = injector.get(LanguageParserService) as LanguageParserService;
191
+ const languageParser = languageParserService.createParser(context.language);
192
+ promptList[current.attributeName] = await getCroppedText(
193
+ current.content,
194
+ maxTokenSize,
195
+ tokens,
196
+ promptConfig.wishList.beforeCursor.strategy,
197
+ tokenizer,
198
+ languageParser,
199
+ wishList.beforeCursor.extOption.minBlockSize ?? 75,
200
+ false,
201
+ token,
202
+ );
203
+ if (token.isCancellationRequested) {
204
+ return;
205
+ }
206
+ leftTokenSize -= maxTokenSize;
207
+ } else {
208
+ leftTokenSize -= currentTokenSize;
209
+ promptList[current.attributeName] = `${current.content}`;
210
+ }
211
+ } else if (current.importedFiles && current.importedFiles.length > 0) {
212
+ let currentImportedFile = '';
213
+ const maxImportedFileTokenSize = Math.ceil(current.maxPercent * leftTokenSize);
214
+ let currentImportedFileTokenSize = 0;
215
+ for (const [filename, importedFile] of current.importedFiles) {
216
+ const importedFileMarker = getMarkerByLanguage(
217
+ `${promptConfig.wishList.importedFile.extOption.patternPrefix}${filename}:${
218
+ promptConfig.wishList.importedFile.extOption.patternSuffix || '\n'
219
+ }${getMarkerForSnippets((importedFile as string[]).join('\n'), context.language)}\n`,
220
+ context.language,
221
+ );
222
+ const tokens = tokenizer.encode(importedFileMarker);
223
+ if (tokens.length > leftTokenSize || currentImportedFileTokenSize + tokens.length > maxImportedFileTokenSize) {
224
+ break;
225
+ }
226
+ leftTokenSize -= tokens.length;
227
+ currentImportedFileTokenSize += tokens.length;
228
+ currentImportedFile += importedFileMarker;
229
+ }
230
+ if (currentImportedFile) {
231
+ // 移除末尾多余的 \n
232
+ currentImportedFile = currentImportedFile.slice(0, -'\n'.length);
233
+ promptList[current.attributeName] = `${currentImportedFile}`;
234
+ }
235
+ } else if (current.similarSnippets && current.similarSnippets.length > 0) {
236
+ let currentSnippet = '';
237
+ const maxSnippetSize = Math.ceil(current.maxPercent * leftTokenSize);
238
+ let currentSnippetSize = 0;
239
+ for (const [filename, snippets] of current.similarSnippets) {
240
+ const similarSnippetMarker = snippets
241
+ .map((snippet) =>
242
+ getMarkerByLanguage(
243
+ `${promptConfig.wishList.similarFile.extOption.patternPrefix}${filename}:${
244
+ promptConfig.wishList.similarFile.extOption.patternSuffix || '\n'
245
+ }${getMarkerForSnippets(snippet.snippet, context.language)}`,
246
+ context.language,
247
+ ),
248
+ )
249
+ .join('\n');
250
+ const tokens = tokenizer.encode(similarSnippetMarker);
251
+ if (tokens.length > leftTokenSize || currentSnippetSize + tokens.length > maxSnippetSize) {
252
+ break;
253
+ }
254
+ leftTokenSize -= tokens.length;
255
+ currentSnippetSize += tokens.length;
256
+ currentSnippet += similarSnippetMarker;
257
+ }
258
+ if (currentSnippet) {
259
+ promptList[current.attributeName] = `${currentSnippet}`;
260
+ }
261
+ }
262
+ }
263
+ return `${addComment(promptList.languageMarker)}${addComment(promptList.pathMarker)}${addComment(
264
+ promptList.importedFile,
265
+ )}${addComment(promptList.similarFile)}${addComment(promptList.beforeCursor)}`.slice(0, -'\n'.length);
266
+ };
267
+
268
+ export const getAfterPrompt = async (
269
+ context: ICompletionContext,
270
+ promptConfig: ICompletionModel,
271
+ injector: Injector,
272
+ token: monaco.CancellationToken,
273
+ ) => {
274
+ const { maxPromptTokenSize, tokenizerName, wishList } = promptConfig;
275
+ const tokenizer = getTokenizer(tokenizerName);
276
+ let afterCursor = context.suffix;
277
+ const afterCursorOptions = promptConfig.wishList.afterCursor.extOption;
278
+ const afterCursorTokens = afterCursor.split('\n').map((line) => tokenizer.encode(line));
279
+ const currentAfterCursorTokenSize = afterCursorTokens.reduce((prev, cur) => prev + cur.length, 0);
280
+ const afterCursorMaxTokenSize = Math.ceil(afterCursorOptions.suffixPercent * maxPromptTokenSize);
281
+ if (currentAfterCursorTokenSize > afterCursorMaxTokenSize) {
282
+ const languageParserService = injector.get(LanguageParserService) as LanguageParserService;
283
+ const languageParser = languageParserService.createParser(context.language);
284
+ afterCursor = await getCroppedText(
285
+ afterCursor,
286
+ afterCursorMaxTokenSize,
287
+ afterCursorTokens,
288
+ promptConfig.wishList.afterCursor.strategy,
289
+ tokenizer,
290
+ languageParser,
291
+ wishList.afterCursor.extOption.minBlockSize ?? 25,
292
+ true,
293
+ token,
294
+ );
295
+ }
296
+ return afterCursor;
297
+ };
@@ -0,0 +1,130 @@
1
+ /* eslint-disable no-case-declarations */
2
+ import { Injector } from '@opensumi/di';
3
+ import { IEditorDocumentModel, WorkbenchEditorService } from '@opensumi/ide-editor';
4
+ import { IWorkspaceService } from '@opensumi/ide-workspace';
5
+
6
+ import { isDocumentValid } from '../../../common/utils';
7
+ import {
8
+ ICompletionContext,
9
+ MatchSimilarSnippet,
10
+ NeighboringTabsOption,
11
+ ResourceDocument,
12
+ SimilarFileOptions,
13
+ } from '../types';
14
+
15
+ import { MAX_NEIGHBOR_AGGREGATE_LENGTH } from './const';
16
+ import { FixedWindowSizeJaccardMatcher } from './jaccardMatcher';
17
+
18
+ export const getOpenedTabFileList = (docuemnts: IEditorDocumentModel[]) => {
19
+ // 过滤超大文档
20
+ const recentFiles = docuemnts.filter((document) => isDocumentValid(document));
21
+ return recentFiles;
22
+ };
23
+
24
+ export const getRecentEditFileList = () => [];
25
+
26
+ export const getNeighboringDocument = async (type: NeighboringTabsOption, injector: Injector) => {
27
+ switch (type) {
28
+ case NeighboringTabsOption.openFileHistory:
29
+ const editorService = injector.get(WorkbenchEditorService) as WorkbenchEditorService;
30
+ const documents = await editorService.getAllOpenedDocuments();
31
+ return getOpenedTabFileList(documents);
32
+ case NeighboringTabsOption.editFileHistory:
33
+ // 暂不支持
34
+ return [];
35
+ case NeighboringTabsOption.editFileRecent:
36
+ return [];
37
+ default:
38
+ return [];
39
+ }
40
+ };
41
+
42
+ export const getNeighboringResource = async (
43
+ targetFileName: string,
44
+ languageId: string,
45
+ option: SimilarFileOptions,
46
+ injector: Injector,
47
+ ) => {
48
+ let documentList: IEditorDocumentModel[] = [];
49
+ for (const type of option.neighboringTabsOption) {
50
+ documentList = documentList.concat(await getNeighboringDocument(type, injector));
51
+ }
52
+ documentList = documentList.filter(
53
+ (document) => document.languageId === languageId && !document.uri.displayName.endsWith(targetFileName),
54
+ );
55
+ // 移除重复项
56
+ documentList = documentList.filter(
57
+ (document, index) => documentList.findIndex((item) => item.uri === document.uri) === index,
58
+ );
59
+
60
+ let neighboringTabsMaxNum = 20;
61
+ if (typeof option.neighboringTabsMaxNum === 'number') {
62
+ neighboringTabsMaxNum = option.neighboringTabsMaxNum;
63
+ }
64
+ documentList = documentList.slice(0, neighboringTabsMaxNum);
65
+ const resources: ResourceDocument[] = documentList.map((document) => {
66
+ const content = document.getText();
67
+ return {
68
+ source: content,
69
+ uri: document.uri,
70
+ languageId: document.languageId,
71
+ offset: content.length,
72
+ };
73
+ });
74
+ return resources;
75
+ };
76
+
77
+ export const getSimilarSnippets = async (
78
+ context: ICompletionContext,
79
+ options: SimilarFileOptions,
80
+ injector: Injector,
81
+ ) => {
82
+ const editorService = injector.get(WorkbenchEditorService) as WorkbenchEditorService;
83
+ const workspaceService = injector.get(IWorkspaceService) as IWorkspaceService;
84
+ // 过滤文件,仅保留相同后缀文件
85
+ // 仅保留 20 项,顺序取决于配置顺序
86
+ const neighboringFiles = await getNeighboringResource(context.filename, context.language, options, injector);
87
+ const snippetLength = (context.prefix + context.suffix).split('\n').length;
88
+ // 窗口大小最大不大于服务端配置的 windowSize 大小
89
+ const windowSize = snippetLength > options.windowSize ? options.windowSize : snippetLength;
90
+
91
+ const doMatcher = FixedWindowSizeJaccardMatcher.factory(windowSize);
92
+ const bestMatchSnippets: [string, MatchSimilarSnippet[]][] = [];
93
+ const currentDocument = editorService.currentEditor?.currentDocumentModel;
94
+ if (!currentDocument) {
95
+ return [];
96
+ }
97
+ const content = currentDocument.getText();
98
+ const matcher = doMatcher.to({
99
+ source: content,
100
+ uri: currentDocument.uri,
101
+ languageId: currentDocument.languageId,
102
+ offset: content.length,
103
+ });
104
+ let currentLength = 0;
105
+ const maxPromptTime = options.maxTime ?? 1000;
106
+ const startTime = Date.now();
107
+ for (const file of neighboringFiles) {
108
+ currentLength += file.source.length;
109
+ const snippet = matcher.findMatches(file, options.snippetSelectionMode);
110
+ if (snippet && snippet.length) {
111
+ const relative = await workspaceService.asRelativePath(file.uri);
112
+ if (relative?.path) {
113
+ bestMatchSnippets.push([relative.path, snippet]);
114
+ }
115
+ }
116
+ if (Date.now() - startTime > maxPromptTime) {
117
+ break;
118
+ }
119
+ // 计算相似性片段总长度不能超过 MAX_NEIGHBOR_AGGREGATE_LENGTH
120
+ if (currentLength > MAX_NEIGHBOR_AGGREGATE_LENGTH) {
121
+ break;
122
+ }
123
+ }
124
+ // 对相似片段以 score 降序排序,仅取前 snippetMaxNum 项
125
+ const sortedSnippets = bestMatchSnippets
126
+ .filter((item) => item[1][0] && item[1][0].score > (options.similarityThreshold ?? 0.6))
127
+ .sort((a, b) => b[1][0].score - a[1][0].score)
128
+ .slice(0, options.snippetMaxNum || 4);
129
+ return sortedSnippets;
130
+ };
@@ -0,0 +1,16 @@
1
+ // @ts-ignore
2
+ import { Tiktoken, getEncoding } from 'js-tiktoken';
3
+
4
+ import { TokenizerName } from '../types';
5
+
6
+ const TOKENIZER_CACHE = new Map<TokenizerName, Tiktoken>();
7
+
8
+ export const getTokenizer = (tokenizerName = TokenizerName.cl100k_base) => {
9
+ let tokenizer = TOKENIZER_CACHE.get(tokenizerName);
10
+ if (tokenizer) {
11
+ return tokenizer;
12
+ }
13
+ tokenizer = getEncoding('cl100k_base');
14
+ TOKENIZER_CACHE.set(tokenizerName, tokenizer);
15
+ return tokenizer;
16
+ };
@@ -1,47 +1,112 @@
1
- import * as constants from './constants';
2
-
3
- export function prePromptHandler(prompt: string): string {
4
- // remove all empty lines
5
- prompt = prompt.replace(/^s*[\n]/gm, '');
6
- const arr = prompt.split('\n');
7
- // if the number of lines is greater than n, take the last n lines
8
- if (arr.length > constants.completionModel.completionPromptMaxLineSize) {
9
- prompt = arr.slice(-constants.completionModel.completionPromptMaxLineSize).join('\n');
10
- }
11
- return prompt;
12
- }
1
+ import { Injector } from '@opensumi/di';
13
2
 
14
- export function preSuffixHandler(suffix: string): string {
15
- suffix = suffix.replace(/^s*[\n]/gm, '');
16
- const arr = suffix.split('\n');
17
- if (arr.length > constants.completionModel.completionSuffixMaxLineSize) {
18
- suffix = arr.slice(-constants.completionModel.completionSuffixMaxLineSize).join('\n');
19
- }
20
- return suffix;
21
- }
3
+ import { getImportedFile } from './prompt/importedFiles';
4
+ import { getAfterPrompt, getBeforePrompt, getLanguageMarker, getPathMarker } from './prompt/prompt';
5
+ import { getSimilarSnippets } from './prompt/similarSnippets';
6
+ import { ICompletionContext, ICompletionModel, MarkerItem, MatchSimilarSnippet } from './types';
22
7
 
23
- export class ReqStack {
24
- queue: any[];
25
- constructor() {
26
- this.queue = [];
27
- }
28
- addReq(reqRequest: { sendRequest: any; cancelRequest: any }) {
29
- this.queue.push(reqRequest);
30
- }
31
- runReq() {
32
- if (this.queue.length === 0) {
33
- return;
34
- }
35
- const fn = this.queue.pop();
36
- return fn.sendRequest();
8
+ import type * as monaco from '@opensumi/ide-monaco';
9
+
10
+ export async function getPrefixPrompt(
11
+ context: ICompletionContext,
12
+ promptConfig: ICompletionModel,
13
+ injector: Injector,
14
+ token: monaco.CancellationToken,
15
+ ): Promise<string> {
16
+ const beforeCursor = context.prefix;
17
+ const smiilarSnippetPriority = promptConfig.wishList.similarFile.priority;
18
+ const beforeCursorPriority = promptConfig.wishList.beforeCursor.priority;
19
+ const importedFilePriority = promptConfig.wishList.importedFile.priority;
20
+ const pathMarkderPriority = promptConfig.wishList.pathMarker.priority;
21
+ const languageMarkerPriority = promptConfig.wishList.languageMarker.priority;
22
+
23
+ const { maxPromptTokenSize } = promptConfig;
24
+
25
+ const prefixPercent = 1 - promptConfig.wishList.afterCursor.extOption.suffixPercent ?? 0.25;
26
+ const beforeCursorMaxTokenSize = Math.ceil(prefixPercent * maxPromptTokenSize);
27
+ const leftTokenSize = beforeCursorMaxTokenSize;
28
+
29
+ // Language Marker
30
+ const languageMarker = promptConfig.wishList.languageMarker.enable ? getLanguageMarker(context.language) : '';
31
+ // Path Marker
32
+ const pathMarker = promptConfig.wishList.pathMarker.enable ? getPathMarker(context.filename, context.language) : '';
33
+ // Similar Snippet
34
+ const similarFileOptions = {
35
+ ...promptConfig.wishList.similarFile.extOption,
36
+ maxTime: Math.min(
37
+ promptConfig.wishList.similarFile.extOption.maxTime || 200,
38
+ promptConfig.maxExecuteTimeMillSecond,
39
+ ),
40
+ };
41
+ const now = Date.now();
42
+ const similarFileSnippets = promptConfig.wishList.similarFile.enable
43
+ ? await getSimilarSnippets(context, similarFileOptions, injector)
44
+ : [];
45
+ if (token?.isCancellationRequested) {
46
+ return beforeCursor;
37
47
  }
38
- cancleRqe() {
39
- if (this.queue.length === 0) {
40
- return;
41
- }
42
- this.queue.forEach((item) => {
43
- item.cancelRequest();
44
- });
45
- this.queue = [];
48
+ const costTime = Date.now() - now;
49
+ // Imported File
50
+ const importedFileOptions = {
51
+ ...promptConfig.wishList.importedFile.extOption,
52
+ maxTime: Math.min(
53
+ promptConfig.wishList.importedFile.extOption.maxTime,
54
+ promptConfig.maxExecuteTimeMillSecond - costTime,
55
+ ),
56
+ };
57
+ const importedFiles: (string | string[])[][] = promptConfig.wishList.importedFile.enable
58
+ ? await getImportedFile(context, importedFileOptions, injector)
59
+ : [];
60
+ if (token?.isCancellationRequested) {
61
+ return beforeCursor;
46
62
  }
63
+
64
+ // 根据内容优先级拼接 prompt
65
+ const promptPriority: MarkerItem[] = [
66
+ {
67
+ attributeName: promptConfig.wishList.languageMarker.attributeName,
68
+ priority: languageMarkerPriority,
69
+ maxPercent: promptConfig.wishList.languageMarker.extOption.maxPercent ?? 1,
70
+ enable: promptConfig.wishList.languageMarker.enable ?? false,
71
+ content: languageMarker,
72
+ },
73
+ {
74
+ attributeName: promptConfig.wishList.pathMarker.attributeName,
75
+ priority: pathMarkderPriority,
76
+ maxPercent: promptConfig.wishList.pathMarker.extOption?.maxPercent ?? 1,
77
+ enable: promptConfig.wishList.pathMarker.enable ?? false,
78
+ content: pathMarker,
79
+ },
80
+ {
81
+ attributeName: promptConfig.wishList.importedFile.attributeName,
82
+ priority: importedFilePriority,
83
+ enable: promptConfig.wishList.importedFile.enable ?? false,
84
+ maxPercent: promptConfig.wishList.importedFile.extOption.maxPercent ?? 1,
85
+ importedFiles,
86
+ },
87
+ {
88
+ attributeName: promptConfig.wishList.similarFile.attributeName,
89
+ priority: smiilarSnippetPriority,
90
+ enable: promptConfig.wishList.similarFile.enable ?? false,
91
+ similarSnippets: similarFileSnippets as [string, MatchSimilarSnippet[]][],
92
+ maxPercent: promptConfig.wishList.similarFile.extOption.maxPercent ?? 1,
93
+ },
94
+ {
95
+ attributeName: promptConfig.wishList.beforeCursor.attributeName,
96
+ priority: beforeCursorPriority,
97
+ maxPercent: promptConfig.wishList.beforeCursor.extOption.maxPercent ?? 1,
98
+ enable: promptConfig.wishList.beforeCursor.enable ?? true,
99
+ content: beforeCursor,
100
+ },
101
+ ];
102
+ return (await getBeforePrompt(promptPriority, context, promptConfig, leftTokenSize, injector, token)) || beforeCursor;
103
+ }
104
+
105
+ export async function getSuffixPrompt(
106
+ context: ICompletionContext,
107
+ promptConfig: ICompletionModel,
108
+ injector: Injector,
109
+ token: monaco.CancellationToken,
110
+ ): Promise<string> {
111
+ return (await getAfterPrompt(context, promptConfig, injector, token)) || '';
47
112
  }