git-coco 0.2.0 β†’ 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,14 @@
1
1
  # `coco` πŸ€– 🦍
2
2
 
3
- Commit Copilot, or `coco`, is your personal scribe for git commit messages. Using [LangChainπŸ¦œπŸ”—](https://js.langchain.com/) to automate the task of creating meaningful commit messages based on your staged changes!
3
+ [![GitHub issues](https://img.shields.io/github/issues/gfargo/coco)](https://github.com/gfargo/coco/issues)
4
+ [![GitHub pull requests](https://img.shields.io/github/issues-pr/gfargo/coco)](https://github.com/gfargo/coco/pulls)
5
+ [![Last Commit](https://img.shields.io/github/last-commit/gfargo/coco)](https://github.com/gfargo/coco/tree/main)
6
+ [![NPM Version](https://img.shields.io/npm/v/git-coco.svg)](https://www.npmjs.com/package/git-coco)
7
+ [![NPM Downloads](https://img.shields.io/npm/dt/git-coco.svg)](https://www.npmjs.com/package/git-coco)
8
+
9
+ Commit Copilot, or `coco`, is your personal scribe for git commit messages. Leveraging the power of [LangChainπŸ¦œπŸ”—](https://js.langchain.com/) and LLMs to encapsulate your staged changes into meaningful commit messages!
10
+
11
+
4
12
 
5
13
  ## Installation
6
14
 
@@ -105,7 +113,7 @@ Remember, command line flags and environment variables should be defined in `UPP
105
113
  - [x] LangChain integration 🦜
106
114
  - [ ] Additional tests! πŸ§ͺ
107
115
  - [ ] Conventional commits πŸ”œ
108
- - [ ] HuggingFace integration πŸ”œ
116
+ - [x] HuggingFace integration πŸ”œ
109
117
  - [ ] Google Vertex AI integration (?)
110
118
  - [ ] Automatic changelog generation 🫣
111
119
  - [ ] Rebase support πŸ”€
@@ -14,11 +14,13 @@ import ora from 'ora';
14
14
  import now from 'performance-now';
15
15
  import prettyMilliseconds from 'pretty-ms';
16
16
  import { Document } from 'langchain/document';
17
+ import { HuggingFaceInference } from 'langchain/llms/hf';
17
18
  import { loadSummarizationChain, LLMChain } from 'langchain/chains';
18
19
  import { OpenAI } from 'langchain/llms/openai';
19
20
  import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
20
21
  import { createTwoFilesPatch } from 'diff';
21
22
  import GPT3NodeTokenizer from 'gpt3-tokenizer';
23
+ import { minimatch } from 'minimatch';
22
24
  import { simpleGit } from 'simple-git';
23
25
 
24
26
  /**
@@ -39,7 +41,9 @@ function removeUndefined(obj) {
39
41
  **/
40
42
  function loadEnvConfig(config) {
41
43
  const envConfig = {
44
+ model: process.env.COCO_MODEL || undefined,
42
45
  openAIApiKey: process.env.OPENAI_API_KEY || undefined,
46
+ huggingFaceHubApiKey: process.env.HUGGINGFACE_HUB_API_KEY || undefined,
43
47
  tokenLimit: process.env.COCO_TOKEN_LIMIT
44
48
  ? parseInt(process.env.COCO_TOKEN_LIMIT)
45
49
  : undefined,
@@ -70,7 +74,9 @@ function loadGitConfig(config) {
70
74
  const gitConfigParsed = ini.parse(gitConfigRaw);
71
75
  config = {
72
76
  ...config,
77
+ model: gitConfigParsed.coco?.model || config.model,
73
78
  openAIApiKey: gitConfigParsed.coco?.openAIApiKey || config.openAIApiKey,
79
+ huggingFaceHubApiKey: gitConfigParsed.coco?.huggingFaceHubApiKey || config.huggingFaceHubApiKey,
74
80
  tokenLimit: parseInt(gitConfigParsed.coco?.tokenLimit) || config.tokenLimit,
75
81
  prompt: gitConfigParsed.coco?.prompt || config.prompt,
76
82
  mode: gitConfigParsed.coco?.mode || config.mode,
@@ -150,7 +156,9 @@ function loadXDGConfig(config) {
150
156
  * Command line options via yargs
151
157
  */
152
158
  const options = {
159
+ model: { type: 'string', description: 'LLM/Model-Name' },
153
160
  openAIApiKey: { type: 'string', description: 'OpenAI API Key' },
161
+ huggingFaceHubApiKey: { type: 'string', description: 'HuggingFace Hub API Key' },
154
162
  tokenLimit: { type: 'number', description: 'Token limit' },
155
163
  prompt: {
156
164
  type: 'string',
@@ -247,7 +255,7 @@ const SUMMARIZE_PROMPT = new PromptTemplate({
247
255
  * @type {Config}
248
256
  */
249
257
  const DEFAULT_CONFIG = {
250
- openAIApiKey: '',
258
+ model: 'openai/gpt-3.5-turbo',
251
259
  verbose: false,
252
260
  tokenLimit: 1024,
253
261
  prompt: COMMIT_PROMPT.template,
@@ -257,7 +265,6 @@ const DEFAULT_CONFIG = {
257
265
  ignoredFiles: ['package-lock.json'],
258
266
  ignoredExtensions: ['.map', '.lock'],
259
267
  };
260
-
261
268
  /**
262
269
  * Load application config
263
270
  *
@@ -541,18 +548,34 @@ async function collectDiffs(node, getFileDiff, tokenizer, logger = new Logger(co
541
548
  };
542
549
  }
543
550
 
544
- // TODO: Extend this to support other models! πŸŽ‰
551
+ /**
552
+ * Get LLM Model Based on Configuration
553
+ *
554
+ * @param fields
555
+ * @param configuration
556
+ * @returns LLM Model
557
+ */
545
558
  function getModel(fields, configuration) {
546
- return new OpenAI(fields, configuration);
547
- // return new HuggingFaceInference({
548
- // // model: 'gpt2',
549
- // // model: 'bigcode/starcoder',
550
- // model: 'bigscience/bloom',
551
- // apiKey: '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',
552
- // maxConcurrency: 4,
553
- // cache: true,
554
- // // maxTokens: 2046,
555
- // })
559
+ const [llm, model] = config.model.split(/\/(.*)/s);
560
+ if (!model) {
561
+ throw new Error(`Invalid model: ${config.model}`);
562
+ }
563
+ switch (llm) {
564
+ case 'huggingface':
565
+ return new HuggingFaceInference({
566
+ model: model,
567
+ apiKey: config.huggingFaceHubApiKey,
568
+ maxConcurrency: 4,
569
+ ...fields,
570
+ });
571
+ case 'openai':
572
+ default:
573
+ return new OpenAI({
574
+ openAIApiKey: config.openAIApiKey,
575
+ modelName: model,
576
+ ...fields,
577
+ }, configuration);
578
+ }
556
579
  }
557
580
  function getTextSplitter(options = {}) {
558
581
  return new RecursiveCharacterTextSplitter(options);
@@ -589,14 +612,11 @@ const parseRenamedFileDiff = async (nodeFile, git, logger) => {
589
612
  const oldFilepath = nodeFile?.oldFilepath || nodeFile.filepath;
590
613
  try {
591
614
  const [headContent, indexContent] = await Promise.all([
592
- // git.diff(['HEAD', '-M', '--', oldFilepath]),
593
- // git.diff(['-z', '-M', '--staged', nodeFile.filepath]),
594
615
  git.show([`HEAD:${oldFilepath}`]),
595
616
  git.show([`:${nodeFile.filepath}`]),
596
- // readFile(nodeFile.filepath),
597
617
  ]);
598
618
  if (headContent !== indexContent) {
599
- result = createTwoFilesPatch(oldFilepath, nodeFile.filepath, headContent, indexContent.toString(), '', '', {
619
+ result = createTwoFilesPatch(oldFilepath, nodeFile.filepath, headContent, indexContent, '', '', {
600
620
  context: 3,
601
621
  });
602
622
  // remove the first 4 lines of the patch (they contain the old and new file names)
@@ -608,7 +628,6 @@ const parseRenamedFileDiff = async (nodeFile, git, logger) => {
608
628
  }
609
629
  catch (err) {
610
630
  logger.verbose(`Error comparing file contents for ${nodeFile.filepath}`, { color: 'red' });
611
- console.log(err);
612
631
  result = 'Error comparing file contents.';
613
632
  }
614
633
  return result;
@@ -651,7 +670,6 @@ const fileChangeParser = async (changes, { tokenizer, git, model }) => {
651
670
  chain: summarizationChain,
652
671
  });
653
672
  logger.stopTimer(`\nSummary generated for ${changes.length} staged files`, { color: 'green' });
654
- logger.verbose(`\nSummary:\n${summary}`, { color: 'blue' });
655
673
  return summary;
656
674
  };
657
675
 
@@ -686,10 +704,25 @@ const getTokenizer = () => {
686
704
  };
687
705
 
688
706
  const llm = async ({ llm, prompt, variables }) => {
707
+ if (!llm || !prompt || !variables) {
708
+ throw new Error('The input parameters "llm", "prompt", and "variables" are all required.');
709
+ }
689
710
  const chain = new LLMChain({ llm, prompt });
690
- const res = await chain.call(variables);
691
- if (res.error)
692
- throw new Error(res.error);
711
+ let res;
712
+ try {
713
+ res = await chain.call(variables);
714
+ }
715
+ catch (error) {
716
+ if (error instanceof Error) {
717
+ throw new Error(`LLMChain call error: ${error.message}`);
718
+ }
719
+ }
720
+ if (!res) {
721
+ throw new Error('Empty response from LLMChain call');
722
+ }
723
+ if (res.error) {
724
+ throw new Error(`LLMChain response error: ${res.error}`);
725
+ }
693
726
  return res.text.trim();
694
727
  };
695
728
 
@@ -727,14 +760,8 @@ const getSummaryText = (file, change) => {
727
760
  return `${status}: ${file.path}`;
728
761
  };
729
762
 
730
- const DEFAULT_IGNORED_FILES = [
731
- ...(config?.ignoredFiles?.length && config?.ignoredFiles?.length > 0 ? config.ignoredFiles : []),
732
- ];
733
- const DEFAULT_IGNORED_EXTENSIONS = [
734
- ...(config?.ignoredExtensions?.length && config?.ignoredExtensions?.length > 0
735
- ? config.ignoredExtensions
736
- : []),
737
- ];
763
+ const DEFAULT_IGNORED_FILES = config?.ignoredFiles?.length ? config.ignoredFiles : [];
764
+ const DEFAULT_IGNORED_EXTENSIONS = config?.ignoredExtensions?.length ? config.ignoredExtensions : [];
738
765
  async function getChanges(git, options = {}) {
739
766
  const { ignoredFiles = DEFAULT_IGNORED_FILES, ignoredExtensions = DEFAULT_IGNORED_EXTENSIONS } = options;
740
767
  const staged = [];
@@ -742,7 +769,6 @@ async function getChanges(git, options = {}) {
742
769
  const untracked = [];
743
770
  const status = await git.status();
744
771
  status.files.forEach((file) => {
745
- // console.log({ file })
746
772
  const fileChange = {
747
773
  filepath: file.path,
748
774
  oldFilepath: status.renamed.filter((renamed) => renamed.to === file.path)[0]?.from,
@@ -769,16 +795,20 @@ async function getChanges(git, options = {}) {
769
795
  const ignoredExtensionsSet = new Set(ignoredExtensions.map((extension) => extension.toLowerCase()));
770
796
  const filteredStaged = staged.filter((file) => {
771
797
  const extension = path__default.extname(file.filepath).toLowerCase();
772
- return !ignoredExtensionsSet.has(extension) && !ignoredFiles.includes(file.filepath);
798
+ return !ignoredExtensionsSet.has(extension) && !ignoredFiles.some(ignoredPattern => minimatch(file.filepath, ignoredPattern));
773
799
  });
774
800
  const filteredUnstaged = unstaged.filter((file) => {
775
801
  const extension = path__default.extname(file.filepath).toLowerCase();
776
- return !ignoredExtensionsSet.has(extension) && !ignoredFiles.includes(file.filepath);
802
+ return !ignoredExtensionsSet.has(extension) && !ignoredFiles.some(ignoredPattern => minimatch(file.filepath, ignoredPattern));
803
+ });
804
+ const filteredUntracked = untracked.filter((file) => {
805
+ const extension = path__default.extname(file.filepath).toLowerCase();
806
+ return !ignoredExtensionsSet.has(extension) && !ignoredFiles.some(ignoredPattern => minimatch(file.filepath, ignoredPattern));
777
807
  });
778
808
  return {
779
809
  staged: filteredStaged,
780
810
  unstaged: filteredUnstaged,
781
- untracked,
811
+ untracked: filteredUntracked,
782
812
  };
783
813
  }
784
814