git-aicommit 5.2.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/autocommit.js CHANGED
@@ -9,11 +9,8 @@ import {
9
9
  SystemMessagePromptTemplate
10
10
  } from "langchain/prompts";
11
11
  import defaultConfig from './config.js';
12
- import {RecursiveCharacterTextSplitter} from "langchain/text_splitter";
13
- import {loadSummarizationChain} from "langchain/chains";
14
12
  import {ChatOpenAI} from "langchain/chat_models/openai";
15
- import {OpenAI} from "langchain/llms/openai";
16
- import fs from "fs";
13
+ import {getModelContextSize} from "./count_tokens.js";
17
14
 
18
15
  const config = rc(
19
16
  'git-aicommit',
@@ -88,8 +85,17 @@ const chatPrompt = ChatPromptTemplate.fromPromptMessages([
88
85
  humanPromptTemplate,
89
86
  ]);
90
87
 
91
- if (diff.length > 2000) {
88
+ const chatMessages = await chatPrompt.formatMessages({
89
+ diff: diff,
90
+ language: config.language,
91
+ });
92
+
93
+ const tokenCount = (await openai.getNumTokensFromMessages(chatMessages)).totalCount
94
+ const contextSize = getModelContextSize(config.modelName)
95
+
96
+ if (tokenCount > contextSize) {
92
97
  console.log('Diff is too long. Splitting into multiple requests.')
98
+ // TODO: split smarter
93
99
  const filenameRegex = /^a\/(.+?)\s+b\/(.+?)/;
94
100
  const diffByFiles = diff
95
101
  .split('diff ' + '--git ') // Wierd string concat in order to avoid splitting on this line when using autocommit in this repo :)
@@ -108,7 +114,6 @@ if (diff.length > 2000) {
108
114
  language: config.language,
109
115
  })
110
116
  .then((prompt) => {
111
- console.log(prompt)
112
117
  return openai.call(prompt)
113
118
  .then((res) => {
114
119
  return {
package/config.js CHANGED
@@ -15,15 +15,14 @@ export default {
15
15
  humanPromptTemplate: '' +
16
16
  'Read the following git diff for a multiple files and ' +
17
17
  'write 1-2 sentences commit message in {language}' +
18
- 'without mentioning lines or files:\n' +
18
+ 'without mentioning lines or files.' +
19
+ 'Explain why these changes were made (summarize the reasoning):\n' +
19
20
  '{diff}',
20
21
  excludeFromDiff: [
21
- '*.lock', '*.lockb'
22
+ '*.lock', '*.lockb', '*-lock.json', '*-lock.yaml'
22
23
  ],
23
24
  diffFilter: 'ACMRTUXB',
24
- completionPromptParams: {
25
- model: "gpt-3.5-turbo",
26
- temperature: 0.0,
27
- maxTokens: 1000,
28
- }
25
+ modelName: "gpt-3.5-turbo-16k",
26
+ temperature: 0.0,
27
+ maxTokens: 2000,
29
28
  }
@@ -0,0 +1,78 @@
1
+ // langchain/dist/base_language/count_tokens.js
2
+ export const getModelNameForTiktoken = (modelName) => {
3
+ if (modelName.startsWith("gpt-3.5-turbo-16k")) {
4
+ return "gpt-3.5-turbo-16k";
5
+ }
6
+ if (modelName.startsWith("gpt-3.5-turbo-")) {
7
+ return "gpt-3.5-turbo";
8
+ }
9
+ if (modelName.startsWith("gpt-4-32k-")) {
10
+ return "gpt-4-32k";
11
+ }
12
+ if (modelName.startsWith("gpt-4-")) {
13
+ return "gpt-4";
14
+ }
15
+ return modelName;
16
+ };
17
+ export const getEmbeddingContextSize = (modelName) => {
18
+ switch (modelName) {
19
+ case "text-embedding-ada-002":
20
+ return 8191;
21
+ default:
22
+ return 2046;
23
+ }
24
+ };
25
+ export const getModelContextSize = (modelName) => {
26
+ switch (getModelNameForTiktoken(modelName)) {
27
+ case "gpt-3.5-turbo-16k":
28
+ return 16384;
29
+ case "gpt-3.5-turbo":
30
+ return 4096;
31
+ case "gpt-4-32k":
32
+ return 32768;
33
+ case "gpt-4":
34
+ return 8192;
35
+ case "text-davinci-003":
36
+ return 4097;
37
+ case "text-curie-001":
38
+ return 2048;
39
+ case "text-babbage-001":
40
+ return 2048;
41
+ case "text-ada-001":
42
+ return 2048;
43
+ case "code-davinci-002":
44
+ return 8000;
45
+ case "code-cushman-001":
46
+ return 2048;
47
+ default:
48
+ return 4097;
49
+ }
50
+ };
51
+ export const importTiktoken = async () => {
52
+ try {
53
+ const { encoding_for_model } = await import("@dqbd/tiktoken");
54
+ return { encoding_for_model };
55
+ }
56
+ catch (error) {
57
+ console.log(error);
58
+ return { encoding_for_model: null };
59
+ }
60
+ };
61
+ export const calculateMaxTokens = async ({ prompt, modelName, }) => {
62
+ const { encoding_for_model } = await importTiktoken();
63
+ // fallback to approximate calculation if tiktoken is not available
64
+ let numTokens = Math.ceil(prompt.length / 4);
65
+ try {
66
+ if (encoding_for_model) {
67
+ const encoding = encoding_for_model(getModelNameForTiktoken(modelName));
68
+ const tokenized = encoding.encode(prompt);
69
+ numTokens = tokenized.length;
70
+ encoding.free();
71
+ }
72
+ }
73
+ catch (error) {
74
+ console.warn("Failed to calculate number of tokens with tiktoken, falling back to approximate count", error);
75
+ }
76
+ const maxTokens = getModelContextSize(modelName);
77
+ return maxTokens - numTokens;
78
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "git-aicommit",
3
- "version": "5.2.0",
3
+ "version": "6.0.0",
4
4
  "description": "Generates auto commit messages with OpenAI GPT3 model",
5
5
  "main": "autocommit.js",
6
6
  "repository": "https://github.com/shanginn/autocommit",
@@ -8,10 +8,10 @@
8
8
  "license": "MIT",
9
9
  "type": "module",
10
10
  "dependencies": {
11
- "@dqbd/tiktoken": "^1.0.7",
12
11
  "langchain": "^0.0.75",
13
- "openai": "^3.2.1",
14
- "rc": "^1.2.8"
12
+ "openai": "^3.3.0",
13
+ "rc": "^1.2.8",
14
+ "tiktoken": "^1.0.8"
15
15
  },
16
16
  "preferGlobal": true,
17
17
  "bin": {