repominer-GDeLuisi 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repominer_gdeluisi-0.1.0.dist-info/METADATA +19 -0
- repominer_gdeluisi-0.1.0.dist-info/RECORD +12 -0
- repominer_gdeluisi-0.1.0.dist-info/WHEEL +4 -0
- repominer_gdeluisi-0.1.0.dist-info/licenses/LICENSE +21 -0
- src/repository_miner/__init__.py +6 -0
- src/repository_miner/data_typing.py +106 -0
- src/repository_miner/exceptions.py +15 -0
- src/repository_miner/git.py +31 -0
- src/repository_miner/helper.py +156 -0
- src/repository_miner/miner.py +98 -0
- src/repository_miner/py.typed +0 -0
- src/repository_miner/utility.py +40 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: repominer-GDeLuisi
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Tool for mining git managed repositories
|
5
|
+
Author: Gerardo De Luisi
|
6
|
+
License-File: LICENSE
|
7
|
+
Keywords: git,miner,projects
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
9
|
+
Classifier: Operating System :: OS Independent
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
14
|
+
Requires-Python: >=3.10
|
15
|
+
Description-Content-Type: text/markdown
|
16
|
+
|
17
|
+
# repository_miner
|
18
|
+
A python library used for mining git repositories
|
19
|
+
git >= 2.40 is needed
|
@@ -0,0 +1,12 @@
|
|
1
|
+
src/repository_miner/__init__.py,sha256=sYtS1nL_0XlvcmJrIxE7TQwfcO1f2HDcs1rvFWt-aww,115
|
2
|
+
src/repository_miner/data_typing.py,sha256=9mZcFk0s-7CrrGcvKxUqNiORkNk8x_sn0Vjc8PxwRqw,2964
|
3
|
+
src/repository_miner/exceptions.py,sha256=xFKI_ra-6FW_Ux3vF-xVM2XRFowAX0FRXzFRLTuTZUg,411
|
4
|
+
src/repository_miner/git.py,sha256=UzbDWLbRGxcwMT8tyPSn8U0UOGoHgJvDWV0-BcBnmR8,1293
|
5
|
+
src/repository_miner/helper.py,sha256=dLVoA-xW1kQoT7VPYy4kyj1FDmkOdElZJPs_S3Ev51c,5829
|
6
|
+
src/repository_miner/miner.py,sha256=GTGJHL-3KF9VamFnlPy0RQvaCyiijxsDlzb-fJZ9I4Y,5035
|
7
|
+
src/repository_miner/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
src/repository_miner/utility.py,sha256=y6rgmmitk4UuZI8AthuNa716UfJSjOL9kZaJ-d7Zc8I,1275
|
9
|
+
repominer_gdeluisi-0.1.0.dist-info/METADATA,sha256=zLcpyckfl5WolpDm0e3ccyfj2ELAgy-Pk1NTl4FQjcU,646
|
10
|
+
repominer_gdeluisi-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
11
|
+
repominer_gdeluisi-0.1.0.dist-info/licenses/LICENSE,sha256=rPNAmE1lTJ0R9WVW3rDw85fHnHGXR_AbHj_aSI7njO8,1064
|
12
|
+
repominer_gdeluisi-0.1.0.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 GeggeDL
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,106 @@
|
|
1
|
+
from dataclasses import dataclass,field
|
2
|
+
from datetime import datetime
|
3
|
+
from time import strptime
|
4
|
+
from typing import Literal,get_args,Iterable,Optional,Union,Generator,Callable
|
5
|
+
import json
|
6
|
+
from pathlib import Path
|
7
|
+
from repository_miner.utility import Call
|
8
|
+
|
9
|
+
@dataclass
|
10
|
+
class Author():
|
11
|
+
email:str
|
12
|
+
name:str
|
13
|
+
commits_authored:list[str]=field(default_factory=lambda: [])
|
14
|
+
def __hash__(self):
|
15
|
+
return hash(repr(self.name)+repr(self.email))
|
16
|
+
def __eq__(self, value):
|
17
|
+
if not isinstance(value,Author):
|
18
|
+
raise TypeError(f"Expected value of type <Author>, received {type(value)}")
|
19
|
+
return self.name==value.name and self.email==value.email
|
20
|
+
def __str__(self):
|
21
|
+
return f"Name: {self.name} , Email: {self.email}"
|
22
|
+
def __repr__(self):
|
23
|
+
return f"Name: {self.name} , Email: {self.email} , Commits: {self.commits_authored}"
|
24
|
+
|
25
|
+
@dataclass
|
26
|
+
class CommitInfo():
|
27
|
+
commit_hash:str
|
28
|
+
abbr_hash:str
|
29
|
+
tree:str
|
30
|
+
refs:str
|
31
|
+
subject:str
|
32
|
+
author_name:str
|
33
|
+
author_email:str
|
34
|
+
date:datetime
|
35
|
+
def __hash__(self):
|
36
|
+
return hash(self.commit_hash)
|
37
|
+
def get_tree(self)->'Tree':
|
38
|
+
raise NotImplementedError()
|
39
|
+
|
40
|
+
@dataclass
|
41
|
+
class Head():
|
42
|
+
name:str
|
43
|
+
hash:str
|
44
|
+
def __hash__(self):
|
45
|
+
return hash(self.hash)
|
46
|
+
def traverse_commits(self)->Generator[CommitInfo,None,None]:
|
47
|
+
raise NotImplementedError()
|
48
|
+
|
49
|
+
class HeadImpl(Head):
|
50
|
+
def __init__(self,name:str,hash:str,retrieve_func:Call):
|
51
|
+
super().__init__(name,hash)
|
52
|
+
self.retrieve_func=retrieve_func
|
53
|
+
def traverse_commits(self):
|
54
|
+
return self.retrieve_func()
|
55
|
+
|
56
|
+
|
57
|
+
@dataclass
|
58
|
+
class Blob():
|
59
|
+
hash:str
|
60
|
+
name:str
|
61
|
+
path:str
|
62
|
+
size:int
|
63
|
+
def __hash__(self):
|
64
|
+
return hash(self.hash)
|
65
|
+
def get_source(self)->list[str]:
|
66
|
+
raise NotImplementedError()
|
67
|
+
|
68
|
+
@dataclass
|
69
|
+
class Tree():
|
70
|
+
hash:str
|
71
|
+
path:str
|
72
|
+
def traverse(self)->Generator[Union['Tree',Blob],None,None]:
|
73
|
+
raise NotImplementedError()
|
74
|
+
def __hash__(self):
|
75
|
+
return hash(self.hash)
|
76
|
+
|
77
|
+
class TreeImpl(Tree):
|
78
|
+
def __init__(self,hash:str,path:str,iter_function:Call):
|
79
|
+
super().__init__(hash,path)
|
80
|
+
self.iter_func=iter_function
|
81
|
+
def traverse(self)->Generator[Union[Tree,Blob],None,None]:
|
82
|
+
return self.iter_func()
|
83
|
+
|
84
|
+
class CommitInfoImpl(CommitInfo):
|
85
|
+
def __init__(self
|
86
|
+
,commit_hash:str
|
87
|
+
,abbr_hash:str
|
88
|
+
,tree:str
|
89
|
+
,refs:str
|
90
|
+
,subject:str
|
91
|
+
,author_name:str
|
92
|
+
,author_email:str
|
93
|
+
,date:datetime
|
94
|
+
,tree_func:Call):
|
95
|
+
super().__init__(commit_hash,abbr_hash,tree,refs,subject,author_name,author_email,date)
|
96
|
+
self.tree_func=tree_func
|
97
|
+
|
98
|
+
def get_tree(self)->Tree:
|
99
|
+
return self.tree_func()
|
100
|
+
|
101
|
+
class BlobImpl(Blob):
|
102
|
+
def __init__(self,hash:str,name:str,path:str,size:int,source_func:Call):
|
103
|
+
super().__init__(hash,name,path,size)
|
104
|
+
self.source_func=source_func
|
105
|
+
def get_source(self):
|
106
|
+
return self.source_func()
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class GitNotFoundException(Exception):
|
2
|
+
def __init__(self, *args):
|
3
|
+
super().__init__(*args)
|
4
|
+
|
5
|
+
class GitCmdError(Exception):
|
6
|
+
def __init__(self, *args):
|
7
|
+
super().__init__(*args)
|
8
|
+
|
9
|
+
class NotGitRepositoryError(Exception):
|
10
|
+
def __init__(self, *args):
|
11
|
+
super().__init__(*args)
|
12
|
+
|
13
|
+
class ParsingException(Exception):
|
14
|
+
def __init__(self, *args):
|
15
|
+
super().__init__(*args)
|
@@ -0,0 +1,31 @@
|
|
1
|
+
from .utility import execute_command
|
2
|
+
from .helper import cmd_builder,log_builder,rev_list_builder,get_head_commit,is_dir_a_repo,is_git_available
|
3
|
+
from .exceptions import *
|
4
|
+
from functools import partial
|
5
|
+
from typing import Iterable,Optional
|
6
|
+
from datetime import datetime
|
7
|
+
from subprocess import CalledProcessError
|
8
|
+
class Git():
|
9
|
+
def __init__(self,path:str):
|
10
|
+
if not is_git_available():
|
11
|
+
raise GitNotFoundException("Git not found")
|
12
|
+
if not is_dir_a_repo(path):
|
13
|
+
raise NotGitRepositoryError(f"Directory {path} is not a git repository")
|
14
|
+
self.path=path
|
15
|
+
|
16
|
+
def _execute_command(self,command:str,*args)->str:
|
17
|
+
cmd=""
|
18
|
+
try:
|
19
|
+
if len(args)==1 and not isinstance(args[0],str) and isinstance(args[0],Iterable):
|
20
|
+
cmd=cmd_builder(command,self.path,*args[0])
|
21
|
+
else:
|
22
|
+
cmd=cmd_builder(command,self.path,*args)
|
23
|
+
return execute_command(cmd)
|
24
|
+
except CalledProcessError as e:
|
25
|
+
raise GitCmdError(f"Command {cmd} raised an error {e.stderr}")
|
26
|
+
|
27
|
+
def __getattr__(self, name:str):
|
28
|
+
if name in self.__dict__ or name in self.__class__.__dict__:
|
29
|
+
return getattr(self,name)
|
30
|
+
name=name.replace("_","-")
|
31
|
+
return partial(self._execute_command,name)
|
@@ -0,0 +1,156 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from shutil import which
|
3
|
+
import sys
|
4
|
+
import os
|
5
|
+
import subprocess
|
6
|
+
from typing import Optional
|
7
|
+
from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor
|
8
|
+
from math import floor,ceil
|
9
|
+
from functools import partial
|
10
|
+
from typing import Iterable
|
11
|
+
import json
|
12
|
+
from datetime import datetime
|
13
|
+
# max_worker = min(32,os.cpu_count())
|
14
|
+
|
15
|
+
def date_builder(since:Optional[datetime]=None,to:Optional[datetime]=None)->list[str]:
|
16
|
+
args=[]
|
17
|
+
if to and since and to<since:
|
18
|
+
raise ValueError("'to' cannot come before 'since'")
|
19
|
+
if since:
|
20
|
+
d_str=since.strftime(r"%Y-%m-%d")
|
21
|
+
args.append(f"--since='{d_str}'")
|
22
|
+
if to:
|
23
|
+
d_str=to.strftime(r"%Y-%m-%d")
|
24
|
+
args.append(f"--until='{d_str}'")
|
25
|
+
return args
|
26
|
+
|
27
|
+
def cmd_builder(command:str,repo:str,*args)->str:
|
28
|
+
"""Base git command generator
|
29
|
+
|
30
|
+
Args:
|
31
|
+
command (str): command to use
|
32
|
+
repo (str): git directory to execute the command on
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
str: The complete command as a string
|
36
|
+
"""
|
37
|
+
|
38
|
+
arg_string=f"git -C {repo} {command}"
|
39
|
+
arg_string=arg_string + " "+ " ".join(args)
|
40
|
+
return arg_string
|
41
|
+
|
42
|
+
def range_builder(from_commmit:str,to_commit:Optional[str]=None)->str:
|
43
|
+
if not from_commmit:
|
44
|
+
raise ValueError("'from_commit' parameter must always be valorized")
|
45
|
+
if to_commit:
|
46
|
+
return f"{to_commit}..{from_commmit}"
|
47
|
+
else:
|
48
|
+
return from_commmit
|
49
|
+
|
50
|
+
def log_builder(from_commit:str,to_commit:Optional[str]=None,pretty:Optional[str]=None,merges:bool=False,max_count:Optional[int]=None,skip:Optional[int]=None,author:Optional[str]=None,follow:Optional[str]=None,since:Optional[datetime]=None,to:Optional[datetime]=None,args=[])->str:
|
51
|
+
"""Builds the complete command string for a log command
|
52
|
+
|
53
|
+
Args:
|
54
|
+
repo (str): Git repository to execute the command on
|
55
|
+
commit (str): The commit from which start the logging operation
|
56
|
+
pretty (Optional[str], optional): The format used by --pretty. Defaults to None.
|
57
|
+
merges (bool, optional): Specifies whether load merge commits. Defaults to False.
|
58
|
+
max_count (Optional[int], optional): Paramenter for --max-count flag. Defaults to None.
|
59
|
+
skip (Optional[int], optional): Parameter for --skip flag. Defaults to None.
|
60
|
+
author (Optional[str], optional): Filter only commits coming authored by the passed author. Defaults to None.
|
61
|
+
follow (Optional[str], optional): Filter only commits which changed the passed file. Defaults to None.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
str: Returns the git command string
|
65
|
+
"""
|
66
|
+
arg_list=[range_builder(from_commit,to_commit)]
|
67
|
+
if max_count!=None:
|
68
|
+
if max_count<=0:
|
69
|
+
raise ValueError("max_count cannot be negative or 0")
|
70
|
+
arg_list.append(f"--max-count={max_count}")
|
71
|
+
if skip!=None:
|
72
|
+
if skip<0:
|
73
|
+
raise ValueError("skip cannot be negative")
|
74
|
+
arg_list.append(f"--skip={skip}")
|
75
|
+
if merges:
|
76
|
+
arg_list.append("--no-merges")
|
77
|
+
if pretty!=None:
|
78
|
+
arg_list.append(f'--pretty="format:{pretty}"')
|
79
|
+
if author:
|
80
|
+
arg_list.append(f'--author="{author}"')
|
81
|
+
arg_list.extend(date_builder(since,to))
|
82
|
+
arg_list.extend(args)
|
83
|
+
if follow:
|
84
|
+
arg_list.append(f'--follow -- "{follow}"')
|
85
|
+
return " ".join(arg_list)
|
86
|
+
|
87
|
+
def rev_list_builder(from_commit:str,to_commit:Optional[str]=None,pretty:Optional[str]=None,merges:bool=False,max_count:Optional[int]=None,skip:Optional[int]=None,author:Optional[str]=None,since:Optional[datetime]=None,to:Optional[datetime]=None,args=[])->str:
|
88
|
+
"""Builds the complete command string for a log command
|
89
|
+
|
90
|
+
Args:
|
91
|
+
repo (str): Git repository to execute the command on
|
92
|
+
commit (str): The commit from which start the logging operation
|
93
|
+
pretty (Optional[str], optional): The format used by --pretty. Defaults to None.
|
94
|
+
merges (bool, optional): Specifies whether load merge commits. Defaults to False.
|
95
|
+
max_count (Optional[int], optional): Paramenter for --max-count flag. Defaults to None.
|
96
|
+
skip (Optional[int], optional): Parameter for --skip flag. Defaults to None.
|
97
|
+
author (Optional[str], optional): Filter only commits coming authored by the passed author. Defaults to None.
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
str: Returns the git command string
|
101
|
+
"""
|
102
|
+
arg_list=[range_builder(from_commit,to_commit)]
|
103
|
+
if max_count!=None:
|
104
|
+
if max_count<=0:
|
105
|
+
raise ValueError("max_count cannot be negative or 0")
|
106
|
+
arg_list.append(f"--max-count={max_count}")
|
107
|
+
if skip!=None:
|
108
|
+
if skip<0:
|
109
|
+
raise ValueError("skip cannot be negative")
|
110
|
+
arg_list.append(f"--skip={skip}")
|
111
|
+
if merges:
|
112
|
+
arg_list.append("--no-merges")
|
113
|
+
if pretty!=None:
|
114
|
+
arg_list.append(f'--pretty="format:{pretty}"')
|
115
|
+
if author!=None:
|
116
|
+
arg_list.append(f'--author="{author}"')
|
117
|
+
arg_list.extend(date_builder(since,to))
|
118
|
+
arg_list.extend(args)
|
119
|
+
return " ".join(arg_list)
|
120
|
+
|
121
|
+
def is_git_available()->bool:
|
122
|
+
"""Checks whether git is on PATH
|
123
|
+
|
124
|
+
Returns:
|
125
|
+
bool: If git is on PATH
|
126
|
+
"""
|
127
|
+
return which("git")!=None
|
128
|
+
|
129
|
+
def is_dir_a_repo(path:str)->bool:
|
130
|
+
"""Checks whether the path points to a git directory
|
131
|
+
|
132
|
+
Args:
|
133
|
+
path (str): path to repo dir
|
134
|
+
|
135
|
+
Returns:
|
136
|
+
bool: Returns wheter the directory is a repo
|
137
|
+
"""
|
138
|
+
cmd = f"git -C {Path(path).resolve().as_posix()} rev-parse HEAD"
|
139
|
+
try:
|
140
|
+
subprocess.check_call(cmd,shell=True)
|
141
|
+
return True
|
142
|
+
except subprocess.CalledProcessError:
|
143
|
+
return False
|
144
|
+
|
145
|
+
def get_head_commit(path:str)->str:
|
146
|
+
"""Return head commit
|
147
|
+
|
148
|
+
Args:
|
149
|
+
path (str): path to git directory
|
150
|
+
|
151
|
+
Returns:
|
152
|
+
str: Returns HEAD's commit sha
|
153
|
+
"""
|
154
|
+
cmd = f"git -C {Path(path).resolve().as_posix()} rev-parse HEAD"
|
155
|
+
return subprocess.check_output(cmd,shell=True).decode()[:-1]
|
156
|
+
|
@@ -0,0 +1,98 @@
|
|
1
|
+
from .git import Git
|
2
|
+
from .utility import Call
|
3
|
+
from .helper import cmd_builder,log_builder,rev_list_builder,get_head_commit,is_dir_a_repo,is_git_available
|
4
|
+
from .exceptions import *
|
5
|
+
from .data_typing import *
|
6
|
+
from functools import partial
|
7
|
+
from typing import Iterable,Optional,Generator
|
8
|
+
from datetime import datetime
|
9
|
+
import re
|
10
|
+
class RepoMiner():
|
11
|
+
def __init__(self,path:str):
|
12
|
+
self.git=Git(path)
|
13
|
+
self.path=path
|
14
|
+
|
15
|
+
def retrieve_commits(self,from_commit:Optional[str]=None,to_commit:Optional[str]=None,merges:bool=False,max_count:Optional[int]=None,skip:Optional[int]=None,author:Optional[str]=None,follow:Optional[str]=None,since:Optional[datetime]=None,to:Optional[datetime]=None,extra_args:Optional[Iterable[str]]=[])->Generator[CommitInfo,None,None]:
|
16
|
+
if not from_commit:
|
17
|
+
from_commit=get_head_commit(self.path)
|
18
|
+
pretty=r"%H///%T///%s///%an///%ae///%as///%D"
|
19
|
+
logs=self.git.log(log_builder(from_commit,to_commit,pretty,merges,max_count,skip,author,follow,since,to,extra_args))
|
20
|
+
for log in logs.splitlines(False):
|
21
|
+
try:
|
22
|
+
c_hash,tree,sub,a_name,a_email,c_date,ref=log.split(r"///")
|
23
|
+
yield CommitInfoImpl(c_hash,c_hash[:7],tree,ref,sub,a_name,a_email,datetime.strptime(c_date,r"%Y-%m-%d"),Call(self.tree,tree))
|
24
|
+
except ValueError as e:
|
25
|
+
raise ParsingException(f"Log {log} was not parsed")
|
26
|
+
|
27
|
+
def n_commits(self,from_commit:Optional[str]=None,to_commit:Optional[str]=None,merges:bool=True,skip:Optional[int]=None,author:Optional[str]=None,since:Optional[datetime]=None,to:Optional[datetime]=None)->int:
|
28
|
+
if not from_commit:
|
29
|
+
from_commit=get_head_commit(self.path)
|
30
|
+
return int(self.git.rev_list(rev_list_builder(from_commit=from_commit,to_commit=to_commit,merges=merges,max_count=None,skip=skip,author=author,since=since,to=to,args=["--count"])))
|
31
|
+
|
32
|
+
def tree(self,treeish:str)->Tree:
|
33
|
+
try:
|
34
|
+
t=self.git.cat_file(["-t",treeish])
|
35
|
+
if t == "blob" and t == "tag":
|
36
|
+
raise GitCmdError()
|
37
|
+
return TreeImpl(treeish,"",Call(self.iterate_tree,treeish,True))
|
38
|
+
except GitCmdError as e:
|
39
|
+
raise ValueError(f"Cannot retrieve a tree from {treeish}")
|
40
|
+
|
41
|
+
def iterate_tree(self,treeish:str,recursive:bool=False)->Generator[Union[Tree,Blob],None,None]:
|
42
|
+
p_format="--format=\"%(objectname)///%(objecttype)///%(objectsize)///%(path)\""
|
43
|
+
args=[p_format]
|
44
|
+
if recursive:
|
45
|
+
args.append("-r")
|
46
|
+
args.append("-t")
|
47
|
+
args.append(treeish)
|
48
|
+
try:
|
49
|
+
res=self.git.ls_tree(args)
|
50
|
+
for line in res.splitlines(False):
|
51
|
+
h,t,size,path=line.split('///')
|
52
|
+
if t == "tree":
|
53
|
+
yield TreeImpl(h,path,Call(self.iterate_tree,treeish=treeish,recursive=True))
|
54
|
+
elif t == "blob":
|
55
|
+
size=int(size)
|
56
|
+
yield BlobImpl(h,path.rsplit("/",1)[-1],path,size,Call(self.get_source,h))
|
57
|
+
except GitCmdError as e:
|
58
|
+
raise ValueError(f"Cannot retrieve a tree from {treeish}")
|
59
|
+
except ValueError as e:
|
60
|
+
raise ParsingException(f"Unable to parse tree line {line}")
|
61
|
+
|
62
|
+
def get_commit(self,commit_sha:str)->CommitInfo:
|
63
|
+
pretty=r"%H///%T///%s///%an///%ae///%as///%D"
|
64
|
+
log=self.git.log(log_builder(commit_sha,None,pretty,max_count=1))
|
65
|
+
c_hash,tree,sub,a_name,a_email,c_date,ref=log.split(r"///")
|
66
|
+
return CommitInfoImpl(c_hash,c_hash[:7],tree,ref,sub,a_name,a_email,datetime.strptime(c_date,r"%Y-%m-%d"),Call(self.tree,tree))
|
67
|
+
|
68
|
+
def local_branches(self)->Generator[Head,None,None]:
|
69
|
+
branches=self.git.branch("-l").splitlines()
|
70
|
+
for branch in branches:
|
71
|
+
name=branch.strip("*").strip()
|
72
|
+
yield HeadImpl(name,self.git.rev_parse(name),Call(self.retrieve_commits,from_commit=name,merges=True))
|
73
|
+
|
74
|
+
def authors(self)->set[Author]:
|
75
|
+
pattern=re.compile(r'([A-Za-zÀ-ÖØ-öø-ÿé\s]+) <([a-z0-9A-ZÀ-ÖØ-öø-ÿé!#$%@.&*+\/=?^_{|}~-]+)> \(\d+\)')
|
76
|
+
authors=set()
|
77
|
+
res=self.git.shortlog(["-e","--all","--pretty='format:%H'"])
|
78
|
+
res=res.split("\n\n")[:-1]
|
79
|
+
for a_block in res:
|
80
|
+
tmp=a_block.split("\n")
|
81
|
+
author=tmp.pop(0).strip()
|
82
|
+
match=re.match(pattern=pattern,string=author)
|
83
|
+
if not match:
|
84
|
+
continue
|
85
|
+
name,email=match.groups()
|
86
|
+
author = Author(name,email,[])
|
87
|
+
for line in tmp:
|
88
|
+
author.commits_authored.append(line.strip())
|
89
|
+
authors.add(author)
|
90
|
+
return authors
|
91
|
+
|
92
|
+
def get_source(self, id:str)->list[str]:
|
93
|
+
try:
|
94
|
+
if self.git.cat_file("-t",id) != "blob":
|
95
|
+
raise TypeError(f"Hexsha {id} in not a blob")
|
96
|
+
except GitCmdError:
|
97
|
+
raise FileNotFoundError("Couldn't retrieve the object")
|
98
|
+
return re.split(string=self.git.cat_file("-p",id),pattern=r"\r\n|\r|\n")
|
File without changes
|
@@ -0,0 +1,40 @@
|
|
1
|
+
import subprocess
|
2
|
+
from math import floor,ceil
|
3
|
+
from typing import Iterable,Callable,Any
|
4
|
+
|
5
|
+
class Call():
|
6
|
+
def __init__(self,func:Callable[...,Any],*args,**kwargs):
|
7
|
+
self.func = func
|
8
|
+
self.args=args
|
9
|
+
self.kwargs=kwargs
|
10
|
+
def __call__(self, *args, **kwds):
|
11
|
+
return self.func(*self.args,**self.kwargs)
|
12
|
+
|
13
|
+
def execute_command(command:str)->str:
|
14
|
+
return subprocess.check_output(command,shell=True,text=True,encoding="utf-8").strip()
|
15
|
+
|
16
|
+
def create_batches(it:Iterable,n:int)->Iterable[Iterable]:
|
17
|
+
"""create batches of n items for batch using the items in the iterable
|
18
|
+
|
19
|
+
Args:
|
20
|
+
it (Iterable): iterable from which batches are created
|
21
|
+
n (int): number of items for each batch
|
22
|
+
|
23
|
+
Raises:
|
24
|
+
ValueError: If iterable is empty or None and if the number of items for batch is not correct
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
Iterable[Iterable]: Iterable containing the batches
|
28
|
+
"""
|
29
|
+
if not n:
|
30
|
+
raise ValueError("n must be at least 1")
|
31
|
+
if not it:
|
32
|
+
raise ValueError("Iterable cannot be None or empty")
|
33
|
+
batches=[]
|
34
|
+
tmp=list(it)
|
35
|
+
n_items=len(tmp)
|
36
|
+
if n_items==0:
|
37
|
+
raise ValueError("Iterable must not be empty")
|
38
|
+
for i in range(0,n_items,n):
|
39
|
+
batches.append(tmp[i:i+n])
|
40
|
+
return tuple(batches)
|