repo_miner 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5bc0a4630da25d7d089012de9d1e954f4c8b3521
4
- data.tar.gz: 87d1fa6593047e0a52e0dd460d8c32caafc51aee
3
+ metadata.gz: f8e005fe499b1896489c3f607ac01c8865dd5bba
4
+ data.tar.gz: daa6c769d3dd9112c1e4266e2d3f32583292d0ea
5
5
  SHA512:
6
- metadata.gz: dd822f039d5b4e025e03e61e3fc08158b4ec8d93ee3e088a3c6c2a726d430d989661262966d65f3169ee62c24596cb70d7611ab91c85fa14f454cc5931514b57
7
- data.tar.gz: 945d1f2af6d73d6db682206062ef049a599be14876ba74515767967066b38afa6d5c449591194c768f7d49ad3463cadb57ced6c41de526a3e889bd37b38b9f2c
6
+ metadata.gz: b9fb75bda604333a55de49e7bd1e92d8bc728b8ab0b73139a7ea01bf76e289444d0ed223948c2f78890d93f9fc9a06905664518d550e834609f84f56fc5e23f4
7
+ data.tar.gz: b85aabbad7cbb291cee1b4d56c57b68ed69f874ff7b5bdd7fd2daef6375b3201bd84cc99905796281aae394c381f1cbf6cd33d3145749eb0a92a29f132f97ff9
data/README.md CHANGED
@@ -29,7 +29,23 @@ Or install it yourself as:
29
29
 
30
30
  ## Usage
31
31
 
32
- TODO: Write usage instructions here
32
+ Make a new repository:
33
+
34
+ ```ruby
35
+ repository = RepoMiner::Repository.new('/path/to/git/repo')
36
+ ```
37
+
38
+ Analyse all commits for a given branch:
39
+
40
+ ```ruby
41
+ commits = repository.analyse('master')
42
+ ```
43
+
44
+ See mined dependency data for a given commit:
45
+
46
+ ```ruby
47
+ commits.last #=> RepoMiner::Commit:0x007fd87fdf1150(message: "Fixes 1597", sha: c656e48ada19c6c83f7705893f0a73cfc1844abf, data: {:email=>{:committer=>"andrewnez@gmail.com", :author=>"andrewnez@gmail.com"}, :dependencies=>{:added_manifests=>[], :modified_manifests=>[{:path=>"Gemfile", :platform=>"rubygems", :added_dependencies=>[], :modified_dependencies=>[], :removed_dependencies=>[{:name=>"sass", :requirement=>"= 3.4.24", :type=>:runtime}]}, {:path=>"Gemfile.lock", :platform=>"rubygems", :added_dependencies=>[{:name=>"sass-listen", :requirement=>"4.0.0", :type=>"runtime"}], :modified_dependencies=>[{:name=>"commonmarker", :requirement=>"0.16.8", :type=>"runtime", :previous_requirement=>"0.16.7"}, {:name=>"gitlab", :requirement=>"4.2.0", :type=>"runtime", :previous_requirement=>"4.1.0"}, {:name=>"rack-cors", :requirement=>"1.0.0", :type=>"runtime", :previous_requirement=>"0.4.1"}, {:name=>"sass", :requirement=>"3.5.1", :type=>"runtime", :previous_requirement=>"3.4.24"}, {:name=>"sassc", :requirement=>"1.11.4", :type=>"runtime", :previous_requirement=>"1.11.2"}], :removed_dependencies=>[]}], :removed_manifests=>[]}})
48
+ ```
33
49
 
34
50
  ## Development
35
51
 
@@ -1,18 +1,26 @@
1
1
  module RepoMiner
2
2
  class Commit
3
3
  attr_reader :repository
4
- attr_reader :commit
4
+ attr_reader :rugged_commit
5
5
  attr_accessor :data
6
6
 
7
- def initialize(repository, commit)
7
+ def initialize(repository, rugged_commit)
8
8
  @repository = repository
9
- @commit = commit
9
+ @rugged_commit = rugged_commit
10
10
  @data = {}
11
11
  end
12
12
 
13
+ def message
14
+ rugged_commit.message.strip
15
+ end
16
+
17
+ def sha
18
+ rugged_commit.oid
19
+ end
20
+
13
21
  def analyse
14
- # for every miner (except Base) analyse commit
15
22
  Miners::Email.new.analyse(self)
23
+ Miners::Dependencies.new.analyse(self)
16
24
 
17
25
  self
18
26
  end
@@ -20,5 +28,25 @@ module RepoMiner
20
28
  def add_data(key, miner_data)
21
29
  data[key] = miner_data
22
30
  end
31
+
32
+ def content_before(file_path)
33
+ content_for_commit(rugged_commit.parents[0], file_path)
34
+ end
35
+
36
+ def content_after(file_path)
37
+ content_for_commit(rugged_commit, file_path)
38
+ end
39
+
40
+ def inspect
41
+ "RepoMiner::Commit:#{"0x00%x" % (object_id << 1)}(message: #{message}, sha: #{sha}, data: #{data})"
42
+ end
43
+
44
+ private
45
+
46
+ def content_for_commit(rugged_commit, file_path)
47
+ path = rugged_commit.tree.path(file_path)
48
+ blob = repository.rugged_repository.lookup(path[:oid])
49
+ blob.content
50
+ end
23
51
  end
24
52
  end
@@ -0,0 +1,182 @@
1
+ require 'bibliothecary'
2
+
3
+ module RepoMiner
4
+ module Miners
5
+ class Dependencies
6
+ def analyse(commit)
7
+ all_paths = blob_paths(commit.rugged_commit)
8
+
9
+ added_paths = all_paths.select{|path| path[:status] == :added }.map{|path| path[:path] }
10
+ modified_paths = all_paths.select{|path| path[:status] == :modified }.map{|path| path[:path] }
11
+ removed_paths = all_paths.select{|path| path[:status] == :deleted }.map{|path| path[:path] }
12
+
13
+ added_manifest_paths = Bibliothecary.identify_manifests(added_paths)
14
+ modified_manifest_paths = Bibliothecary.identify_manifests(modified_paths)
15
+ removed_manifest_paths = Bibliothecary.identify_manifests(removed_paths)
16
+
17
+ # don't both analysing commits where no dependency files touched
18
+ return nil if added_manifest_paths.empty? && modified_manifest_paths.empty? && removed_manifest_paths.empty?
19
+
20
+ # Added manifest files
21
+ added_manifests = []
22
+ added_manifest_paths.each do |manifest_path|
23
+ manifest = Bibliothecary.analyse_file(manifest_path, commit.content_after(manifest_path))
24
+
25
+ new_manifest = manifest.first
26
+
27
+ if new_manifest
28
+ dependencies = new_manifest[:dependencies]
29
+ added = dependencies.map{|d| d[:name] }
30
+
31
+ added.map! do |dep_name|
32
+ dep = dependencies.find{|d| d[:name] == dep_name }
33
+ {
34
+ name: dep_name,
35
+ requirement: dep[:requirement],
36
+ type: dep[:type]
37
+ }
38
+ end
39
+
40
+ added_manifests << {
41
+ path: manifest_path,
42
+ platform: manifest[0][:platform],
43
+ added_dependencies: added,
44
+ modified_dependencies: [],
45
+ removed_dependencies: []
46
+ }
47
+ end
48
+ end
49
+
50
+ # Modified manifest files
51
+ modified_manifests = []
52
+ modified_manifest_paths.each do |manifest_path|
53
+ before_manifest = Bibliothecary.analyse_file(manifest_path, commit.content_before(manifest_path))
54
+ before_modified_manifest = before_manifest.first
55
+
56
+ after_manifest = Bibliothecary.analyse_file(manifest_path, commit.content_after(manifest_path))
57
+ after_modified_manifest = after_manifest.first
58
+
59
+ if before_modified_manifest && after_modified_manifest
60
+ before_dependencies = before_modified_manifest[:dependencies]
61
+ after_dependencies = after_modified_manifest[:dependencies]
62
+
63
+ added_dependency_names = after_dependencies.map{|d| d[:name] } - before_dependencies.map{|d| d[:name] }
64
+ removed_dependency_names = before_dependencies.map{|d| d[:name] } - after_dependencies.map{|d| d[:name] }
65
+
66
+ potentially_modified_dependency_names = after_dependencies.map{|d| d[:name] } - added_dependency_names - removed_dependency_names
67
+ modified_dependency_names = potentially_modified_dependency_names.select do |name|
68
+ after = after_dependencies.find{|d| d[:name] == name }
69
+ before = before_dependencies.find{|d| d[:name] == name }
70
+ (after[:requirement] != before[:requirement]) || (after[:type] != before[:type])
71
+ end
72
+
73
+ # added_dependencies
74
+ added_dependencies = added_dependency_names.map do |dep_name|
75
+ dep = after_dependencies.find{|d| d[:name] == dep_name }
76
+ {
77
+ name: dep_name,
78
+ requirement: dep[:requirement],
79
+ type: dep[:type]
80
+ }
81
+ end
82
+
83
+ # modified_dependencies
84
+ modified_dependencies = modified_dependency_names.map do |dep_name|
85
+ after = after_dependencies.find{|d| d[:name] == dep_name }
86
+ before = before_dependencies.find{|d| d[:name] == dep_name }
87
+ dep_hash = {
88
+ name: dep_name,
89
+ requirement: after[:requirement],
90
+ type: after[:type]
91
+ }
92
+ dep_hash[:previous_requirement] = before[:requirement] if after[:requirement] != before[:requirement]
93
+ dep_hash[:previous_type] = before[:type] if after[:type] != before[:type]
94
+ dep_hash
95
+ end
96
+
97
+ # removed_dependencies
98
+ removed_dependencies = removed_dependency_names.map do |dep_name|
99
+ dep = before_dependencies.find{|d| d[:name] == dep_name }
100
+ {
101
+ name: dep_name,
102
+ requirement: dep[:requirement],
103
+ type: dep[:type]
104
+ }
105
+ end
106
+
107
+ modified_manifests << {
108
+ path: manifest_path,
109
+ platform: after_modified_manifest[:platform],
110
+ added_dependencies: added_dependencies,
111
+ modified_dependencies: modified_dependencies,
112
+ removed_dependencies: removed_dependencies
113
+ }
114
+ end
115
+ end
116
+
117
+ # Removed manifest files
118
+ removed_manifests = []
119
+ removed_manifest_paths.each do |manifest_path|
120
+ manifest = Bibliothecary.analyse_file(manifest_path, commit.content_before(manifest_path))
121
+
122
+ removed_manifest = manifest.first
123
+
124
+ if removed_manifest
125
+ dependencies = removed_manifest[:dependencies]
126
+ removed = dependencies.map{|d| d[:name] }
127
+
128
+ removed.map! do |dep_name|
129
+ dep = dependencies.find{|d| d[:name] == dep_name }
130
+ {
131
+ name: dep_name,
132
+ requirement: dep[:requirement],
133
+ type: dep[:type]
134
+ }
135
+ end
136
+
137
+ removed_manifests << {
138
+ path: manifest_path,
139
+ platform: manifest[0][:platform],
140
+ added_dependencies: [],
141
+ modified_dependencies: [],
142
+ removed_dependencies: removed
143
+ }
144
+ end
145
+ end
146
+
147
+ data = {
148
+ added_manifests: added_manifests,
149
+ modified_manifests: modified_manifests,
150
+ removed_manifests: removed_manifests
151
+ }
152
+
153
+ commit.add_data(:dependencies, data)
154
+ end
155
+
156
+ private
157
+
158
+ def blob_paths(commit)
159
+ paths = []
160
+
161
+ if commit.parents.count == 0 # initial commit
162
+ commit.tree.walk_blobs(:postorder) do |root, entry|
163
+ paths << {
164
+ status: :added,
165
+ path: "#{root}#{entry[:name]}"
166
+ }
167
+ end
168
+ else
169
+ diffs = commit.parents[0].diff(commit)
170
+
171
+ diffs.each_delta do |delta|
172
+ paths << {
173
+ status: delta.status,
174
+ path: delta.new_file[:path]
175
+ }
176
+ end
177
+ end
178
+ paths
179
+ end
180
+ end
181
+ end
182
+ end
@@ -3,17 +3,14 @@ module RepoMiner
3
3
  class Email
4
4
  def analyse(commit)
5
5
  # analyse commit
6
- committer_email = commit.commit.committer[:email]
7
- author_email = commit.commit.author[:email]
6
+ committer_email = commit.rugged_commit.committer[:email]
7
+ author_email = commit.rugged_commit.author[:email]
8
8
 
9
9
  # attach mined info to commit
10
10
  commit.add_data(:email, {
11
11
  committer: committer_email,
12
12
  author: author_email
13
13
  })
14
-
15
- # return the commit
16
- commit
17
14
  end
18
15
  end
19
16
  end
@@ -7,14 +7,14 @@ module RepoMiner
7
7
  @repo_path = repo_path
8
8
  end
9
9
 
10
- def repository
10
+ def rugged_repository
11
11
  @repository ||= Rugged::Repository.new(repo_path)
12
12
  end
13
13
 
14
14
  def walk(branch)
15
- @walker = Rugged::Walker.new(repository)
15
+ @walker = Rugged::Walker.new(rugged_repository)
16
16
  @walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_REVERSE)
17
- @walker.push(repository.branches[branch].target_id)
17
+ @walker.push(rugged_repository.branches[branch].target_id)
18
18
  @walker
19
19
  end
20
20
 
@@ -1,3 +1,3 @@
1
1
  module RepoMiner
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/repo_miner.gemspec CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
11
11
 
12
12
  spec.summary = "Mine repositories for interesting changes over time"
13
13
  spec.homepage = "https://github.com/librariesio/repo_miner"
14
- spec.license = "APGL-3.0"
14
+ spec.license = "AGPL-3.0"
15
15
 
16
16
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
17
17
  f.match(%r{^(test|spec|features)/})
@@ -21,6 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.require_paths = ["lib"]
22
22
 
23
23
  spec.add_dependency "rugged"
24
+ spec.add_dependency "bibliothecary"
24
25
 
25
26
  spec.add_development_dependency "bundler", "~> 1.14"
26
27
  spec.add_development_dependency "rake", "~> 12.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: repo_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Nesbitt
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-13 00:00:00.000000000 Z
11
+ date: 2017-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rugged
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bibliothecary
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -86,13 +100,14 @@ files:
86
100
  - bin/setup
87
101
  - lib/repo_miner.rb
88
102
  - lib/repo_miner/commit.rb
103
+ - lib/repo_miner/miners/dependencies.rb
89
104
  - lib/repo_miner/miners/email.rb
90
105
  - lib/repo_miner/repository.rb
91
106
  - lib/repo_miner/version.rb
92
107
  - repo_miner.gemspec
93
108
  homepage: https://github.com/librariesio/repo_miner
94
109
  licenses:
95
- - APGL-3.0
110
+ - AGPL-3.0
96
111
  metadata: {}
97
112
  post_install_message:
98
113
  rdoc_options: []