repo_miner 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +17 -1
- data/lib/repo_miner/commit.rb +32 -4
- data/lib/repo_miner/miners/dependencies.rb +182 -0
- data/lib/repo_miner/miners/email.rb +2 -5
- data/lib/repo_miner/repository.rb +3 -3
- data/lib/repo_miner/version.rb +1 -1
- data/repo_miner.gemspec +2 -1
- metadata +18 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f8e005fe499b1896489c3f607ac01c8865dd5bba
|
4
|
+
data.tar.gz: daa6c769d3dd9112c1e4266e2d3f32583292d0ea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b9fb75bda604333a55de49e7bd1e92d8bc728b8ab0b73139a7ea01bf76e289444d0ed223948c2f78890d93f9fc9a06905664518d550e834609f84f56fc5e23f4
|
7
|
+
data.tar.gz: b85aabbad7cbb291cee1b4d56c57b68ed69f874ff7b5bdd7fd2daef6375b3201bd84cc99905796281aae394c381f1cbf6cd33d3145749eb0a92a29f132f97ff9
|
data/README.md
CHANGED
@@ -29,7 +29,23 @@ Or install it yourself as:
|
|
29
29
|
|
30
30
|
## Usage
|
31
31
|
|
32
|
-
|
32
|
+
Make a new repository:
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
repository = RepoMiner::Repository.new('/path/to/git/repo')
|
36
|
+
```
|
37
|
+
|
38
|
+
Analyse all commits for a given branch:
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
commits = repository.analyse('master')
|
42
|
+
```
|
43
|
+
|
44
|
+
See mined dependency data for a given commit:
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
commits.last #=> RepoMiner::Commit:0x007fd87fdf1150(message: "Fixes 1597", sha: c656e48ada19c6c83f7705893f0a73cfc1844abf, data: {:email=>{:committer=>"andrewnez@gmail.com", :author=>"andrewnez@gmail.com"}, :dependencies=>{:added_manifests=>[], :modified_manifests=>[{:path=>"Gemfile", :platform=>"rubygems", :added_dependencies=>[], :modified_dependencies=>[], :removed_dependencies=>[{:name=>"sass", :requirement=>"= 3.4.24", :type=>:runtime}]}, {:path=>"Gemfile.lock", :platform=>"rubygems", :added_dependencies=>[{:name=>"sass-listen", :requirement=>"4.0.0", :type=>"runtime"}], :modified_dependencies=>[{:name=>"commonmarker", :requirement=>"0.16.8", :type=>"runtime", :previous_requirement=>"0.16.7"}, {:name=>"gitlab", :requirement=>"4.2.0", :type=>"runtime", :previous_requirement=>"4.1.0"}, {:name=>"rack-cors", :requirement=>"1.0.0", :type=>"runtime", :previous_requirement=>"0.4.1"}, {:name=>"sass", :requirement=>"3.5.1", :type=>"runtime", :previous_requirement=>"3.4.24"}, {:name=>"sassc", :requirement=>"1.11.4", :type=>"runtime", :previous_requirement=>"1.11.2"}], :removed_dependencies=>[]}], :removed_manifests=>[]}})
|
48
|
+
```
|
33
49
|
|
34
50
|
## Development
|
35
51
|
|
data/lib/repo_miner/commit.rb
CHANGED
@@ -1,18 +1,26 @@
|
|
1
1
|
module RepoMiner
|
2
2
|
class Commit
|
3
3
|
attr_reader :repository
|
4
|
-
attr_reader :
|
4
|
+
attr_reader :rugged_commit
|
5
5
|
attr_accessor :data
|
6
6
|
|
7
|
-
def initialize(repository,
|
7
|
+
def initialize(repository, rugged_commit)
|
8
8
|
@repository = repository
|
9
|
-
@
|
9
|
+
@rugged_commit = rugged_commit
|
10
10
|
@data = {}
|
11
11
|
end
|
12
12
|
|
13
|
+
def message
|
14
|
+
rugged_commit.message.strip
|
15
|
+
end
|
16
|
+
|
17
|
+
def sha
|
18
|
+
rugged_commit.oid
|
19
|
+
end
|
20
|
+
|
13
21
|
def analyse
|
14
|
-
# for every miner (except Base) analyse commit
|
15
22
|
Miners::Email.new.analyse(self)
|
23
|
+
Miners::Dependencies.new.analyse(self)
|
16
24
|
|
17
25
|
self
|
18
26
|
end
|
@@ -20,5 +28,25 @@ module RepoMiner
|
|
20
28
|
def add_data(key, miner_data)
|
21
29
|
data[key] = miner_data
|
22
30
|
end
|
31
|
+
|
32
|
+
def content_before(file_path)
|
33
|
+
content_for_commit(rugged_commit.parents[0], file_path)
|
34
|
+
end
|
35
|
+
|
36
|
+
def content_after(file_path)
|
37
|
+
content_for_commit(rugged_commit, file_path)
|
38
|
+
end
|
39
|
+
|
40
|
+
def inspect
|
41
|
+
"RepoMiner::Commit:#{"0x00%x" % (object_id << 1)}(message: #{message}, sha: #{sha}, data: #{data})"
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def content_for_commit(rugged_commit, file_path)
|
47
|
+
path = rugged_commit.tree.path(file_path)
|
48
|
+
blob = repository.rugged_repository.lookup(path[:oid])
|
49
|
+
blob.content
|
50
|
+
end
|
23
51
|
end
|
24
52
|
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
require 'bibliothecary'
|
2
|
+
|
3
|
+
module RepoMiner
|
4
|
+
module Miners
|
5
|
+
class Dependencies
|
6
|
+
def analyse(commit)
|
7
|
+
all_paths = blob_paths(commit.rugged_commit)
|
8
|
+
|
9
|
+
added_paths = all_paths.select{|path| path[:status] == :added }.map{|path| path[:path] }
|
10
|
+
modified_paths = all_paths.select{|path| path[:status] == :modified }.map{|path| path[:path] }
|
11
|
+
removed_paths = all_paths.select{|path| path[:status] == :deleted }.map{|path| path[:path] }
|
12
|
+
|
13
|
+
added_manifest_paths = Bibliothecary.identify_manifests(added_paths)
|
14
|
+
modified_manifest_paths = Bibliothecary.identify_manifests(modified_paths)
|
15
|
+
removed_manifest_paths = Bibliothecary.identify_manifests(removed_paths)
|
16
|
+
|
17
|
+
# don't both analysing commits where no dependency files touched
|
18
|
+
return nil if added_manifest_paths.empty? && modified_manifest_paths.empty? && removed_manifest_paths.empty?
|
19
|
+
|
20
|
+
# Added manifest files
|
21
|
+
added_manifests = []
|
22
|
+
added_manifest_paths.each do |manifest_path|
|
23
|
+
manifest = Bibliothecary.analyse_file(manifest_path, commit.content_after(manifest_path))
|
24
|
+
|
25
|
+
new_manifest = manifest.first
|
26
|
+
|
27
|
+
if new_manifest
|
28
|
+
dependencies = new_manifest[:dependencies]
|
29
|
+
added = dependencies.map{|d| d[:name] }
|
30
|
+
|
31
|
+
added.map! do |dep_name|
|
32
|
+
dep = dependencies.find{|d| d[:name] == dep_name }
|
33
|
+
{
|
34
|
+
name: dep_name,
|
35
|
+
requirement: dep[:requirement],
|
36
|
+
type: dep[:type]
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
added_manifests << {
|
41
|
+
path: manifest_path,
|
42
|
+
platform: manifest[0][:platform],
|
43
|
+
added_dependencies: added,
|
44
|
+
modified_dependencies: [],
|
45
|
+
removed_dependencies: []
|
46
|
+
}
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Modified manifest files
|
51
|
+
modified_manifests = []
|
52
|
+
modified_manifest_paths.each do |manifest_path|
|
53
|
+
before_manifest = Bibliothecary.analyse_file(manifest_path, commit.content_before(manifest_path))
|
54
|
+
before_modified_manifest = before_manifest.first
|
55
|
+
|
56
|
+
after_manifest = Bibliothecary.analyse_file(manifest_path, commit.content_after(manifest_path))
|
57
|
+
after_modified_manifest = after_manifest.first
|
58
|
+
|
59
|
+
if before_modified_manifest && after_modified_manifest
|
60
|
+
before_dependencies = before_modified_manifest[:dependencies]
|
61
|
+
after_dependencies = after_modified_manifest[:dependencies]
|
62
|
+
|
63
|
+
added_dependency_names = after_dependencies.map{|d| d[:name] } - before_dependencies.map{|d| d[:name] }
|
64
|
+
removed_dependency_names = before_dependencies.map{|d| d[:name] } - after_dependencies.map{|d| d[:name] }
|
65
|
+
|
66
|
+
potentially_modified_dependency_names = after_dependencies.map{|d| d[:name] } - added_dependency_names - removed_dependency_names
|
67
|
+
modified_dependency_names = potentially_modified_dependency_names.select do |name|
|
68
|
+
after = after_dependencies.find{|d| d[:name] == name }
|
69
|
+
before = before_dependencies.find{|d| d[:name] == name }
|
70
|
+
(after[:requirement] != before[:requirement]) || (after[:type] != before[:type])
|
71
|
+
end
|
72
|
+
|
73
|
+
# added_dependencies
|
74
|
+
added_dependencies = added_dependency_names.map do |dep_name|
|
75
|
+
dep = after_dependencies.find{|d| d[:name] == dep_name }
|
76
|
+
{
|
77
|
+
name: dep_name,
|
78
|
+
requirement: dep[:requirement],
|
79
|
+
type: dep[:type]
|
80
|
+
}
|
81
|
+
end
|
82
|
+
|
83
|
+
# modified_dependencies
|
84
|
+
modified_dependencies = modified_dependency_names.map do |dep_name|
|
85
|
+
after = after_dependencies.find{|d| d[:name] == dep_name }
|
86
|
+
before = before_dependencies.find{|d| d[:name] == dep_name }
|
87
|
+
dep_hash = {
|
88
|
+
name: dep_name,
|
89
|
+
requirement: after[:requirement],
|
90
|
+
type: after[:type]
|
91
|
+
}
|
92
|
+
dep_hash[:previous_requirement] = before[:requirement] if after[:requirement] != before[:requirement]
|
93
|
+
dep_hash[:previous_type] = before[:type] if after[:type] != before[:type]
|
94
|
+
dep_hash
|
95
|
+
end
|
96
|
+
|
97
|
+
# removed_dependencies
|
98
|
+
removed_dependencies = removed_dependency_names.map do |dep_name|
|
99
|
+
dep = before_dependencies.find{|d| d[:name] == dep_name }
|
100
|
+
{
|
101
|
+
name: dep_name,
|
102
|
+
requirement: dep[:requirement],
|
103
|
+
type: dep[:type]
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
modified_manifests << {
|
108
|
+
path: manifest_path,
|
109
|
+
platform: after_modified_manifest[:platform],
|
110
|
+
added_dependencies: added_dependencies,
|
111
|
+
modified_dependencies: modified_dependencies,
|
112
|
+
removed_dependencies: removed_dependencies
|
113
|
+
}
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Removed manifest files
|
118
|
+
removed_manifests = []
|
119
|
+
removed_manifest_paths.each do |manifest_path|
|
120
|
+
manifest = Bibliothecary.analyse_file(manifest_path, commit.content_before(manifest_path))
|
121
|
+
|
122
|
+
removed_manifest = manifest.first
|
123
|
+
|
124
|
+
if removed_manifest
|
125
|
+
dependencies = removed_manifest[:dependencies]
|
126
|
+
removed = dependencies.map{|d| d[:name] }
|
127
|
+
|
128
|
+
removed.map! do |dep_name|
|
129
|
+
dep = dependencies.find{|d| d[:name] == dep_name }
|
130
|
+
{
|
131
|
+
name: dep_name,
|
132
|
+
requirement: dep[:requirement],
|
133
|
+
type: dep[:type]
|
134
|
+
}
|
135
|
+
end
|
136
|
+
|
137
|
+
removed_manifests << {
|
138
|
+
path: manifest_path,
|
139
|
+
platform: manifest[0][:platform],
|
140
|
+
added_dependencies: [],
|
141
|
+
modified_dependencies: [],
|
142
|
+
removed_dependencies: removed
|
143
|
+
}
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
data = {
|
148
|
+
added_manifests: added_manifests,
|
149
|
+
modified_manifests: modified_manifests,
|
150
|
+
removed_manifests: removed_manifests
|
151
|
+
}
|
152
|
+
|
153
|
+
commit.add_data(:dependencies, data)
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def blob_paths(commit)
|
159
|
+
paths = []
|
160
|
+
|
161
|
+
if commit.parents.count == 0 # initial commit
|
162
|
+
commit.tree.walk_blobs(:postorder) do |root, entry|
|
163
|
+
paths << {
|
164
|
+
status: :added,
|
165
|
+
path: "#{root}#{entry[:name]}"
|
166
|
+
}
|
167
|
+
end
|
168
|
+
else
|
169
|
+
diffs = commit.parents[0].diff(commit)
|
170
|
+
|
171
|
+
diffs.each_delta do |delta|
|
172
|
+
paths << {
|
173
|
+
status: delta.status,
|
174
|
+
path: delta.new_file[:path]
|
175
|
+
}
|
176
|
+
end
|
177
|
+
end
|
178
|
+
paths
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
@@ -3,17 +3,14 @@ module RepoMiner
|
|
3
3
|
class Email
|
4
4
|
def analyse(commit)
|
5
5
|
# analyse commit
|
6
|
-
committer_email = commit.
|
7
|
-
author_email = commit.
|
6
|
+
committer_email = commit.rugged_commit.committer[:email]
|
7
|
+
author_email = commit.rugged_commit.author[:email]
|
8
8
|
|
9
9
|
# attach mined info to commit
|
10
10
|
commit.add_data(:email, {
|
11
11
|
committer: committer_email,
|
12
12
|
author: author_email
|
13
13
|
})
|
14
|
-
|
15
|
-
# return the commit
|
16
|
-
commit
|
17
14
|
end
|
18
15
|
end
|
19
16
|
end
|
@@ -7,14 +7,14 @@ module RepoMiner
|
|
7
7
|
@repo_path = repo_path
|
8
8
|
end
|
9
9
|
|
10
|
-
def
|
10
|
+
def rugged_repository
|
11
11
|
@repository ||= Rugged::Repository.new(repo_path)
|
12
12
|
end
|
13
13
|
|
14
14
|
def walk(branch)
|
15
|
-
@walker = Rugged::Walker.new(
|
15
|
+
@walker = Rugged::Walker.new(rugged_repository)
|
16
16
|
@walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_REVERSE)
|
17
|
-
@walker.push(
|
17
|
+
@walker.push(rugged_repository.branches[branch].target_id)
|
18
18
|
@walker
|
19
19
|
end
|
20
20
|
|
data/lib/repo_miner/version.rb
CHANGED
data/repo_miner.gemspec
CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
|
|
11
11
|
|
12
12
|
spec.summary = "Mine repositories for interesting changes over time"
|
13
13
|
spec.homepage = "https://github.com/librariesio/repo_miner"
|
14
|
-
spec.license = "
|
14
|
+
spec.license = "AGPL-3.0"
|
15
15
|
|
16
16
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
17
17
|
f.match(%r{^(test|spec|features)/})
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.require_paths = ["lib"]
|
22
22
|
|
23
23
|
spec.add_dependency "rugged"
|
24
|
+
spec.add_dependency "bibliothecary"
|
24
25
|
|
25
26
|
spec.add_development_dependency "bundler", "~> 1.14"
|
26
27
|
spec.add_development_dependency "rake", "~> 12.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: repo_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Nesbitt
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-07-
|
11
|
+
date: 2017-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rugged
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bibliothecary
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -86,13 +100,14 @@ files:
|
|
86
100
|
- bin/setup
|
87
101
|
- lib/repo_miner.rb
|
88
102
|
- lib/repo_miner/commit.rb
|
103
|
+
- lib/repo_miner/miners/dependencies.rb
|
89
104
|
- lib/repo_miner/miners/email.rb
|
90
105
|
- lib/repo_miner/repository.rb
|
91
106
|
- lib/repo_miner/version.rb
|
92
107
|
- repo_miner.gemspec
|
93
108
|
homepage: https://github.com/librariesio/repo_miner
|
94
109
|
licenses:
|
95
|
-
-
|
110
|
+
- AGPL-3.0
|
96
111
|
metadata: {}
|
97
112
|
post_install_message:
|
98
113
|
rdoc_options: []
|