@jpoly1219/context-extractor 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -70,7 +70,7 @@ dune build
70
70
 
71
71
  Ignore the wildcard build errors. The command is meant to setup the modules and imports.
72
72
 
73
- Almost there! Create a `credentials.json` file following the steps at the **credentials.json** section below in the README.
73
+ Almost there! Create a `config.json` file following the steps at the **config.json** section below in the README.
74
74
 
75
75
  Finally, build and run.
76
76
 
@@ -88,16 +88,21 @@ node dist/runner.js
88
88
  3. Extract relevant headers.
89
89
  4. Optionally complete the hole with an LLM.
90
90
 
91
- This library exposes the method `extractContext`, which has the following definition:
91
+ This library exposes two methods `extractContext` and `completeWithLLM`, which have the following definitions:
92
92
 
93
93
  ```ts
94
94
  const extractContext = async (
95
95
  language: Language,
96
96
  sketchPath: string,
97
97
  repoPath: string,
98
- credentialsPath: string,
99
- getCompletion: boolean
100
- ): Promise<{ context: Context | null, completion: string | null }
98
+ ): Promise<Context | null>;
99
+
100
+ const completeWithLLM = async (
101
+ ctx: Context,
102
+ language: Language,
103
+ sketchPath: string,
104
+ configPath: string
105
+ ): Promise<string>;
101
106
 
102
107
  enum Language {
103
108
  TypeScript,
@@ -105,22 +110,26 @@ enum Language {
105
110
  }
106
111
 
107
112
  interface Context {
108
- hole: string,
109
- relevantTypes: Map<string, string[]>,
110
- relevantHeaders: Map<string, string[]>
113
+ holeType: string,
114
+ relevantTypes: Map<Filepath, RelevantType[]>,
115
+ relevantHeaders: Map<Filepath, RelevantHeader[]>
111
116
  }
117
+
118
+ type Filepath = string;
119
+ type RelevantType = string;
120
+ type RelevantHeader = string;
112
121
  ```
113
122
 
114
- - `sketchPath` is the full path to your sketch file with the typed hole construct (`_()` for TypeScript, `_` for OCaml).
123
+ - `sketchPath` is the full path to your sketch file with the typed hole construct (`_()` for TypeScript, `_` for OCaml). This is NOT prefixed with `file://`.
115
124
  - `repoPath` is the full path to your repository root.
116
- - `credentialsPath` is the full path to your `credentials.json`.
117
- - `getCompletion` is a flag to set if you want the LLM to complete the typed hole. This completion is saved in the `completion` field of the return result.
118
- - `null` values will only be set if something goes wrong internally. When `getCompletion` is set to false, the `completion` field's value will be an empty string.
125
+ - `configPath` is the full path to your `config.json`.
126
+ - `null` values will only be set if something goes wrong internally.
127
+ - `ctx` is the result from `extractContext`.
119
128
 
120
- ### credentials.json
129
+ ### config.json
121
130
 
122
131
  The extractor calls OpenAI for code completion.
123
- For this you need a `credentials.json` file that holds your specific OpenAI parameters.
132
+ For this you need a `config.json` file that holds your specific OpenAI parameters.
124
133
 
125
134
  The json has the following format:
126
135
 
@@ -137,7 +146,7 @@ The json has the following format:
137
146
  }
138
147
  ```
139
148
 
140
- Internally, this is how fields above are populated when creating a new OpenAI client.
149
+ Internally, this is how the credentials are populated when creating a new OpenAI client.
141
150
 
142
151
  ```ts
143
152
  const openai = new OpenAI({
package/dist/app.d.ts ADDED
@@ -0,0 +1,27 @@
1
+ import { Language, Context } from "./types";
2
+ export declare class App {
3
+ private language;
4
+ private languageDriver;
5
+ private languageServer;
6
+ private lspClient;
7
+ private sketchPath;
8
+ private repoPath;
9
+ private result;
10
+ private timeout;
11
+ constructor(language: Language, sketchPath: string, repoPath: string);
12
+ init(): Promise<void>;
13
+ run(): Promise<void>;
14
+ close(): void;
15
+ getSavedResult(): Context | null;
16
+ }
17
+ export declare class CompletionEngine {
18
+ private language;
19
+ private config;
20
+ private sketchPath;
21
+ constructor(language: Language, sketchPath: string, configPath: string);
22
+ completeWithLLM(context: Context): Promise<string>;
23
+ generateTypesAndHeadersPrompt(sketchFileContent: string, holeType: string, relevantTypes: string, relevantHeaders: string): {
24
+ role: string;
25
+ content: string;
26
+ }[];
27
+ }
package/dist/app.js ADDED
@@ -0,0 +1,284 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
25
+ var __importDefault = (this && this.__importDefault) || function (mod) {
26
+ return (mod && mod.__esModule) ? mod : { "default": mod };
27
+ };
28
+ Object.defineProperty(exports, "__esModule", { value: true });
29
+ exports.CompletionEngine = exports.App = void 0;
30
+ const path = __importStar(require("path"));
31
+ const fs = __importStar(require("fs"));
32
+ const child_process_1 = require("child_process");
33
+ const child_process_2 = require("child_process");
34
+ const openai_1 = __importDefault(require("openai"));
35
+ const main_1 = require("../ts-lsp-client-dist/src/main");
36
+ const types_1 = require("./types");
37
+ // TODO: Bundle the drivers as barrel exports.
38
+ const typescript_driver_1 = require("./typescript-driver");
39
+ const ocaml_driver_1 = require("./ocaml-driver");
40
+ const utils_1 = require("./utils");
41
+ class App {
42
+ constructor(language, sketchPath, repoPath) {
43
+ // private result: {
44
+ // hole: string;
45
+ // relevantTypes: string[];
46
+ // relevantHeaders: string[];
47
+ // } | null = null;
48
+ this.result = null;
49
+ // Optional timeout for forced termination
50
+ this.timeout = setTimeout(() => {
51
+ if (!this.languageServer.killed) {
52
+ console.log('Forcibly killing the process...');
53
+ this.languageServer.kill('SIGKILL');
54
+ }
55
+ }, 5000);
56
+ this.language = language;
57
+ this.sketchPath = sketchPath;
58
+ this.repoPath = repoPath;
59
+ const r = (() => {
60
+ switch (language) {
61
+ case types_1.Language.TypeScript: {
62
+ this.languageDriver = new typescript_driver_1.TypeScriptDriver();
63
+ return (0, child_process_1.spawn)("typescript-language-server", ["--stdio"], { stdio: ["pipe", "pipe", "pipe"] });
64
+ }
65
+ case types_1.Language.OCaml: {
66
+ this.languageDriver = new ocaml_driver_1.OcamlDriver();
67
+ try {
68
+ (0, child_process_2.execSync)(`eval $(opam env --switch=. --set-switch)`, { shell: "/bin/bash" });
69
+ // execSync("opam switch .", { shell: "/bin/bash" })
70
+ const currDir = __dirname;
71
+ process.chdir(path.dirname(sketchPath));
72
+ // execSync("which dune", { shell: "/bin/bash" })
73
+ (0, child_process_1.spawn)("dune", ["build", "-w"]);
74
+ process.chdir(currDir);
75
+ }
76
+ catch (err) {
77
+ console.log("ERROR:", err);
78
+ }
79
+ // TODO: Spawn a dune build -w on sketch directory.
80
+ // try {
81
+ // execSync("which dune", { shell: "/bin/bash" })
82
+ // spawn("dune", ["build", "-w"]);
83
+ // } catch (err) {
84
+ // console.log("ERROR:", err)
85
+ // }
86
+ // process.chdir(currDir);
87
+ return (0, child_process_1.spawn)("ocamllsp", ["--stdio"]);
88
+ }
89
+ }
90
+ })();
91
+ const e = new main_1.JSONRPCEndpoint(r.stdin, r.stdout);
92
+ const c = new main_1.LspClient(e);
93
+ this.languageServer = r;
94
+ this.lspClient = c;
95
+ this.languageServer.on('close', (code) => {
96
+ if (code !== 0) {
97
+ console.log(`ls process exited with code ${code}`);
98
+ }
99
+ });
100
+ // Clear timeout once the process exits
101
+ this.languageServer.on('exit', () => {
102
+ clearTimeout(this.timeout);
103
+ console.log('Process terminated cleanly.');
104
+ });
105
+ // const logFile = fs.createWriteStream("log.txt");
106
+ // r.stdout.on('data', (d) => logFile.write(d));
107
+ }
108
+ async init() {
109
+ await this.languageDriver.init(this.lspClient, this.sketchPath);
110
+ }
111
+ async run() {
112
+ // const outputFile = fs.createWriteStream("output.txt");
113
+ try {
114
+ await this.init();
115
+ const holeContext = await this.languageDriver.getHoleContext(this.lspClient, this.sketchPath);
116
+ const relevantTypes = await this.languageDriver.extractRelevantTypes(this.lspClient, holeContext.fullHoverResult, holeContext.functionName, holeContext.range.start.line, holeContext.range.end.line, new Map(), holeContext.source);
117
+ // Postprocess the map.
118
+ if (this.language === types_1.Language.TypeScript) {
119
+ relevantTypes.delete("_()");
120
+ for (const [k, { typeSpan: v, sourceFile: src }] of relevantTypes.entries()) {
121
+ relevantTypes.set(k, { typeSpan: v.slice(0, -1), sourceFile: src });
122
+ }
123
+ }
124
+ else if (this.language === types_1.Language.OCaml) {
125
+ relevantTypes.delete("_");
126
+ }
127
+ console.log(path.join(path.dirname(this.sketchPath), `sketch${path.extname(this.sketchPath)}`));
128
+ let repo = [];
129
+ if (this.language === types_1.Language.TypeScript) {
130
+ repo = (0, utils_1.getAllTSFiles)(this.repoPath);
131
+ }
132
+ else if (this.language === types_1.Language.OCaml) {
133
+ repo = (0, utils_1.getAllOCamlFiles)(this.repoPath);
134
+ }
135
+ const relevantHeaders = await this.languageDriver.extractRelevantHeaders(this.lspClient, repo, relevantTypes, holeContext.functionTypeSpan);
136
+ // Postprocess the map.
137
+ if (this.language === types_1.Language.TypeScript) {
138
+ relevantTypes.delete("");
139
+ for (const [k, { typeSpan: v, sourceFile: src }] of relevantTypes.entries()) {
140
+ relevantTypes.set(k, { typeSpan: v + ";", sourceFile: src });
141
+ }
142
+ for (const obj of relevantHeaders) {
143
+ obj.typeSpan += ";";
144
+ }
145
+ }
146
+ const relevantTypesToReturn = new Map();
147
+ relevantTypes.forEach(({ typeSpan: v, sourceFile: src }, _) => {
148
+ if (relevantTypesToReturn.has(src)) {
149
+ const updated = relevantTypesToReturn.get(src);
150
+ updated.push(v);
151
+ relevantTypesToReturn.set(src, updated);
152
+ }
153
+ else {
154
+ relevantTypesToReturn.set(src, [v]);
155
+ }
156
+ });
157
+ const relevantHeadersToReturn = new Map();
158
+ relevantHeaders.forEach(({ typeSpan: v, sourceFile: src }) => {
159
+ if (relevantHeadersToReturn.has(src)) {
160
+ const updated = relevantHeadersToReturn.get(src);
161
+ if (!updated.includes(v)) {
162
+ updated.push(v);
163
+ }
164
+ relevantHeadersToReturn.set(src, updated);
165
+ }
166
+ else {
167
+ relevantHeadersToReturn.set(src, [v]);
168
+ }
169
+ });
170
+ this.result = {
171
+ holeType: holeContext.functionTypeSpan,
172
+ relevantTypes: relevantTypesToReturn,
173
+ relevantHeaders: relevantHeadersToReturn
174
+ };
175
+ }
176
+ catch (err) {
177
+ console.error("Error during execution:", err);
178
+ throw err;
179
+ }
180
+ finally {
181
+ // outputFile.end();
182
+ }
183
+ }
184
+ close() {
185
+ // TODO:
186
+ try {
187
+ this.lspClient.exit();
188
+ }
189
+ catch (err) {
190
+ console.log(err);
191
+ }
192
+ }
193
+ getSavedResult() {
194
+ return this.result;
195
+ }
196
+ }
197
+ exports.App = App;
198
+ class CompletionEngine {
199
+ constructor(language, sketchPath, configPath) {
200
+ this.language = language;
201
+ this.config = JSON.parse(fs.readFileSync(configPath, "utf8"));
202
+ this.sketchPath = sketchPath;
203
+ }
204
+ async completeWithLLM(context) {
205
+ let joinedTypes = "";
206
+ let joinedHeaders = "";
207
+ context.relevantTypes.forEach((v, _) => {
208
+ joinedTypes = joinedTypes + v.join("\n") + "\n";
209
+ });
210
+ context.relevantHeaders.forEach((v, _) => {
211
+ joinedHeaders = joinedHeaders + v.join("\n") + "\n";
212
+ });
213
+ // Create a prompt.
214
+ const prompt = this.generateTypesAndHeadersPrompt(
215
+ // fs.readFileSync(path.join(targetDirectoryPath, "sketch.ts"), "utf8"),
216
+ fs.readFileSync(this.sketchPath, "utf8"), context.holeType, joinedTypes, joinedHeaders);
217
+ // Call the LLM to get completion results back.
218
+ const apiBase = this.config.apiBase;
219
+ const deployment = this.config.deployment;
220
+ const model = this.config.gptModel;
221
+ const apiVersion = this.config.apiVersion;
222
+ const apiKey = this.config.apiKey;
223
+ const openai = new openai_1.default({
224
+ apiKey,
225
+ baseURL: `${apiBase}/openai/deployments/${deployment}`,
226
+ defaultQuery: { "api-version": apiVersion },
227
+ defaultHeaders: { "api-key": apiKey }
228
+ });
229
+ const llmResult = await openai.chat.completions.create({
230
+ model,
231
+ messages: prompt,
232
+ temperature: this.config.temperature
233
+ });
234
+ return llmResult.choices[0].message.content;
235
+ }
236
+ generateTypesAndHeadersPrompt(sketchFileContent, holeType, relevantTypes, relevantHeaders) {
237
+ let holeConstruct = "";
238
+ switch (this.language) {
239
+ case types_1.Language.TypeScript: {
240
+ holeConstruct = "_()";
241
+ }
242
+ case types_1.Language.OCaml: {
243
+ holeConstruct = "_";
244
+ }
245
+ }
246
+ const prompt = [{
247
+ role: "system",
248
+ content: [
249
+ "CODE COMPLETION INSTRUCTIONS:",
250
+ `- Reply with a functional, idiomatic replacement for the program hole marked '${holeConstruct}' in the provided TypeScript program sketch`,
251
+ `- Reply only with a single replacement term for the unqiue distinguished hole marked '${holeConstruct}'`,
252
+ "Reply only with code",
253
+ "- DO NOT include the program sketch in your reply",
254
+ "- DO NOT include a period at the end of your response and DO NOT use markdown",
255
+ "- DO NOT include a type signature for the program hole, as this is redundant and is already in the provided program sketch"
256
+ ].join("\n"),
257
+ }];
258
+ let userPrompt = {
259
+ role: "user",
260
+ content: ""
261
+ };
262
+ if (relevantTypes) {
263
+ userPrompt.content +=
264
+ `# The expected type of the goal completion is ${holeType} #
265
+
266
+ # The following type definitions are likely relevant: #
267
+ ${relevantTypes}
268
+
269
+ `;
270
+ }
271
+ if (relevantHeaders) {
272
+ userPrompt.content += `
273
+ # Consider using these variables relevant to the expected type: #
274
+ ${relevantHeaders}
275
+
276
+ `;
277
+ }
278
+ userPrompt.content += `# Program Sketch to be completed: #\n${(0, utils_1.removeLines)(sketchFileContent).join("\n")}`;
279
+ prompt.push(userPrompt);
280
+ return prompt;
281
+ }
282
+ ;
283
+ }
284
+ exports.CompletionEngine = CompletionEngine;
@@ -0,0 +1,17 @@
1
+ import { relevantTypeObject, varsObject, typesObject } from "./types";
2
+ declare const createDatabaseWithCodeQL: (pathToCodeQL: string, targetPath: string) => string;
3
+ declare const extractHoleType: (pathToCodeQL: string, pathToQuery: string, pathToDatabase: string, outDir: string) => typesObject;
4
+ declare const extractRelevantTypesWithCodeQL: (pathToCodeQL: string, pathToQuery: string, pathToDatabase: string, outDir: string) => Map<string, relevantTypeObject>;
5
+ declare const extractHeadersWithCodeQL: (pathToCodeQL: string, pathToQuery: string, pathToDatabase: string, outDir: string) => Map<string, varsObject>;
6
+ declare const extractTypesAndLocations: (pathToCodeQL: string, pathToQuery: string, pathToDatabase: string, outDir: string) => {
7
+ locationToType: Map<string, string[]>;
8
+ typeToLocation: Map<string, string>;
9
+ };
10
+ declare const extractRelevantContextWithCodeQL: (pathToCodeQL: string, pathToQuery: string, pathToDatabase: string, outDir: string, headers: Map<string, varsObject>, relevantTypes: Map<string, relevantTypeObject>) => Set<string>;
11
+ declare const getRelevantHeaders: (pathToCodeQL: string, pathToQuery: string, pathToDatabase: string, outDir: string, headers: Map<string, varsObject>, holeType: typesObject) => Set<string>;
12
+ declare const getRelevantHeaders3: (pathToCodeQL: string, pathToQuery: string, pathToDatabase: string, outDir: string, headers: Map<string, varsObject>, holeType: typesObject, relevantTypes: Map<string, relevantTypeObject>) => Set<string>;
13
+ declare const getRelevantHeaders4: (pathToCodeQL: string, pathToQueryDir: string, pathToDatabase: string, outDir: string, headers: Map<string, varsObject>, holeType: typesObject, relevantTypes: Map<string, relevantTypeObject>, knownTypeLocations: {
14
+ locationToType: Map<string, string[]>;
15
+ typeToLocation: Map<string, string>;
16
+ }) => Set<string>;
17
+ export { createDatabaseWithCodeQL, extractHoleType, extractRelevantTypesWithCodeQL, extractHeadersWithCodeQL, extractRelevantContextWithCodeQL, extractTypesAndLocations, getRelevantHeaders, getRelevantHeaders3, getRelevantHeaders4 };