repo_miner 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5bc0a4630da25d7d089012de9d1e954f4c8b3521
4
- data.tar.gz: 87d1fa6593047e0a52e0dd460d8c32caafc51aee
3
+ metadata.gz: f8e005fe499b1896489c3f607ac01c8865dd5bba
4
+ data.tar.gz: daa6c769d3dd9112c1e4266e2d3f32583292d0ea
5
5
  SHA512:
6
- metadata.gz: dd822f039d5b4e025e03e61e3fc08158b4ec8d93ee3e088a3c6c2a726d430d989661262966d65f3169ee62c24596cb70d7611ab91c85fa14f454cc5931514b57
7
- data.tar.gz: 945d1f2af6d73d6db682206062ef049a599be14876ba74515767967066b38afa6d5c449591194c768f7d49ad3463cadb57ced6c41de526a3e889bd37b38b9f2c
6
+ metadata.gz: b9fb75bda604333a55de49e7bd1e92d8bc728b8ab0b73139a7ea01bf76e289444d0ed223948c2f78890d93f9fc9a06905664518d550e834609f84f56fc5e23f4
7
+ data.tar.gz: b85aabbad7cbb291cee1b4d56c57b68ed69f874ff7b5bdd7fd2daef6375b3201bd84cc99905796281aae394c381f1cbf6cd33d3145749eb0a92a29f132f97ff9
data/README.md CHANGED
@@ -29,7 +29,23 @@ Or install it yourself as:
29
29
 
30
30
  ## Usage
31
31
 
32
- TODO: Write usage instructions here
32
+ Make a new repository:
33
+
34
+ ```ruby
35
+ repository = RepoMiner::Repository.new('/path/to/git/repo')
36
+ ```
37
+
38
+ Analyse all commits for a given branch:
39
+
40
+ ```ruby
41
+ commits = repository.analyse('master')
42
+ ```
43
+
44
+ See mined dependency data for a given commit:
45
+
46
+ ```ruby
47
+ commits.last #=> RepoMiner::Commit:0x007fd87fdf1150(message: "Fixes 1597", sha: c656e48ada19c6c83f7705893f0a73cfc1844abf, data: {:email=>{:committer=>"andrewnez@gmail.com", :author=>"andrewnez@gmail.com"}, :dependencies=>{:added_manifests=>[], :modified_manifests=>[{:path=>"Gemfile", :platform=>"rubygems", :added_dependencies=>[], :modified_dependencies=>[], :removed_dependencies=>[{:name=>"sass", :requirement=>"= 3.4.24", :type=>:runtime}]}, {:path=>"Gemfile.lock", :platform=>"rubygems", :added_dependencies=>[{:name=>"sass-listen", :requirement=>"4.0.0", :type=>"runtime"}], :modified_dependencies=>[{:name=>"commonmarker", :requirement=>"0.16.8", :type=>"runtime", :previous_requirement=>"0.16.7"}, {:name=>"gitlab", :requirement=>"4.2.0", :type=>"runtime", :previous_requirement=>"4.1.0"}, {:name=>"rack-cors", :requirement=>"1.0.0", :type=>"runtime", :previous_requirement=>"0.4.1"}, {:name=>"sass", :requirement=>"3.5.1", :type=>"runtime", :previous_requirement=>"3.4.24"}, {:name=>"sassc", :requirement=>"1.11.4", :type=>"runtime", :previous_requirement=>"1.11.2"}], :removed_dependencies=>[]}], :removed_manifests=>[]}})
48
+ ```
33
49
 
34
50
  ## Development
35
51
 
@@ -1,18 +1,26 @@
1
1
  module RepoMiner
2
2
  class Commit
3
3
  attr_reader :repository
4
- attr_reader :commit
4
+ attr_reader :rugged_commit
5
5
  attr_accessor :data
6
6
 
7
- def initialize(repository, commit)
7
+ def initialize(repository, rugged_commit)
8
8
  @repository = repository
9
- @commit = commit
9
+ @rugged_commit = rugged_commit
10
10
  @data = {}
11
11
  end
12
12
 
13
+ def message
14
+ rugged_commit.message.strip
15
+ end
16
+
17
+ def sha
18
+ rugged_commit.oid
19
+ end
20
+
13
21
  def analyse
14
- # for every miner (except Base) analyse commit
15
22
  Miners::Email.new.analyse(self)
23
+ Miners::Dependencies.new.analyse(self)
16
24
 
17
25
  self
18
26
  end
@@ -20,5 +28,25 @@ module RepoMiner
20
28
  def add_data(key, miner_data)
21
29
  data[key] = miner_data
22
30
  end
31
+
32
+ def content_before(file_path)
33
+ content_for_commit(rugged_commit.parents[0], file_path)
34
+ end
35
+
36
+ def content_after(file_path)
37
+ content_for_commit(rugged_commit, file_path)
38
+ end
39
+
40
+ def inspect
41
+ "RepoMiner::Commit:#{"0x00%x" % (object_id << 1)}(message: #{message}, sha: #{sha}, data: #{data})"
42
+ end
43
+
44
+ private
45
+
46
+ def content_for_commit(rugged_commit, file_path)
47
+ path = rugged_commit.tree.path(file_path)
48
+ blob = repository.rugged_repository.lookup(path[:oid])
49
+ blob.content
50
+ end
23
51
  end
24
52
  end
@@ -0,0 +1,182 @@
1
+ require 'bibliothecary'
2
+
3
+ module RepoMiner
4
+ module Miners
5
+ class Dependencies
6
+ def analyse(commit)
7
+ all_paths = blob_paths(commit.rugged_commit)
8
+
9
+ added_paths = all_paths.select{|path| path[:status] == :added }.map{|path| path[:path] }
10
+ modified_paths = all_paths.select{|path| path[:status] == :modified }.map{|path| path[:path] }
11
+ removed_paths = all_paths.select{|path| path[:status] == :deleted }.map{|path| path[:path] }
12
+
13
+ added_manifest_paths = Bibliothecary.identify_manifests(added_paths)
14
+ modified_manifest_paths = Bibliothecary.identify_manifests(modified_paths)
15
+ removed_manifest_paths = Bibliothecary.identify_manifests(removed_paths)
16
+
17
+ # don't both analysing commits where no dependency files touched
18
+ return nil if added_manifest_paths.empty? && modified_manifest_paths.empty? && removed_manifest_paths.empty?
19
+
20
+ # Added manifest files
21
+ added_manifests = []
22
+ added_manifest_paths.each do |manifest_path|
23
+ manifest = Bibliothecary.analyse_file(manifest_path, commit.content_after(manifest_path))
24
+
25
+ new_manifest = manifest.first
26
+
27
+ if new_manifest
28
+ dependencies = new_manifest[:dependencies]
29
+ added = dependencies.map{|d| d[:name] }
30
+
31
+ added.map! do |dep_name|
32
+ dep = dependencies.find{|d| d[:name] == dep_name }
33
+ {
34
+ name: dep_name,
35
+ requirement: dep[:requirement],
36
+ type: dep[:type]
37
+ }
38
+ end
39
+
40
+ added_manifests << {
41
+ path: manifest_path,
42
+ platform: manifest[0][:platform],
43
+ added_dependencies: added,
44
+ modified_dependencies: [],
45
+ removed_dependencies: []
46
+ }
47
+ end
48
+ end
49
+
50
+ # Modified manifest files
51
+ modified_manifests = []
52
+ modified_manifest_paths.each do |manifest_path|
53
+ before_manifest = Bibliothecary.analyse_file(manifest_path, commit.content_before(manifest_path))
54
+ before_modified_manifest = before_manifest.first
55
+
56
+ after_manifest = Bibliothecary.analyse_file(manifest_path, commit.content_after(manifest_path))
57
+ after_modified_manifest = after_manifest.first
58
+
59
+ if before_modified_manifest && after_modified_manifest
60
+ before_dependencies = before_modified_manifest[:dependencies]
61
+ after_dependencies = after_modified_manifest[:dependencies]
62
+
63
+ added_dependency_names = after_dependencies.map{|d| d[:name] } - before_dependencies.map{|d| d[:name] }
64
+ removed_dependency_names = before_dependencies.map{|d| d[:name] } - after_dependencies.map{|d| d[:name] }
65
+
66
+ potentially_modified_dependency_names = after_dependencies.map{|d| d[:name] } - added_dependency_names - removed_dependency_names
67
+ modified_dependency_names = potentially_modified_dependency_names.select do |name|
68
+ after = after_dependencies.find{|d| d[:name] == name }
69
+ before = before_dependencies.find{|d| d[:name] == name }
70
+ (after[:requirement] != before[:requirement]) || (after[:type] != before[:type])
71
+ end
72
+
73
+ # added_dependencies
74
+ added_dependencies = added_dependency_names.map do |dep_name|
75
+ dep = after_dependencies.find{|d| d[:name] == dep_name }
76
+ {
77
+ name: dep_name,
78
+ requirement: dep[:requirement],
79
+ type: dep[:type]
80
+ }
81
+ end
82
+
83
+ # modified_dependencies
84
+ modified_dependencies = modified_dependency_names.map do |dep_name|
85
+ after = after_dependencies.find{|d| d[:name] == dep_name }
86
+ before = before_dependencies.find{|d| d[:name] == dep_name }
87
+ dep_hash = {
88
+ name: dep_name,
89
+ requirement: after[:requirement],
90
+ type: after[:type]
91
+ }
92
+ dep_hash[:previous_requirement] = before[:requirement] if after[:requirement] != before[:requirement]
93
+ dep_hash[:previous_type] = before[:type] if after[:type] != before[:type]
94
+ dep_hash
95
+ end
96
+
97
+ # removed_dependencies
98
+ removed_dependencies = removed_dependency_names.map do |dep_name|
99
+ dep = before_dependencies.find{|d| d[:name] == dep_name }
100
+ {
101
+ name: dep_name,
102
+ requirement: dep[:requirement],
103
+ type: dep[:type]
104
+ }
105
+ end
106
+
107
+ modified_manifests << {
108
+ path: manifest_path,
109
+ platform: after_modified_manifest[:platform],
110
+ added_dependencies: added_dependencies,
111
+ modified_dependencies: modified_dependencies,
112
+ removed_dependencies: removed_dependencies
113
+ }
114
+ end
115
+ end
116
+
117
+ # Removed manifest files
118
+ removed_manifests = []
119
+ removed_manifest_paths.each do |manifest_path|
120
+ manifest = Bibliothecary.analyse_file(manifest_path, commit.content_before(manifest_path))
121
+
122
+ removed_manifest = manifest.first
123
+
124
+ if removed_manifest
125
+ dependencies = removed_manifest[:dependencies]
126
+ removed = dependencies.map{|d| d[:name] }
127
+
128
+ removed.map! do |dep_name|
129
+ dep = dependencies.find{|d| d[:name] == dep_name }
130
+ {
131
+ name: dep_name,
132
+ requirement: dep[:requirement],
133
+ type: dep[:type]
134
+ }
135
+ end
136
+
137
+ removed_manifests << {
138
+ path: manifest_path,
139
+ platform: manifest[0][:platform],
140
+ added_dependencies: [],
141
+ modified_dependencies: [],
142
+ removed_dependencies: removed
143
+ }
144
+ end
145
+ end
146
+
147
+ data = {
148
+ added_manifests: added_manifests,
149
+ modified_manifests: modified_manifests,
150
+ removed_manifests: removed_manifests
151
+ }
152
+
153
+ commit.add_data(:dependencies, data)
154
+ end
155
+
156
+ private
157
+
158
+ def blob_paths(commit)
159
+ paths = []
160
+
161
+ if commit.parents.count == 0 # initial commit
162
+ commit.tree.walk_blobs(:postorder) do |root, entry|
163
+ paths << {
164
+ status: :added,
165
+ path: "#{root}#{entry[:name]}"
166
+ }
167
+ end
168
+ else
169
+ diffs = commit.parents[0].diff(commit)
170
+
171
+ diffs.each_delta do |delta|
172
+ paths << {
173
+ status: delta.status,
174
+ path: delta.new_file[:path]
175
+ }
176
+ end
177
+ end
178
+ paths
179
+ end
180
+ end
181
+ end
182
+ end
@@ -3,17 +3,14 @@ module RepoMiner
3
3
  class Email
4
4
  def analyse(commit)
5
5
  # analyse commit
6
- committer_email = commit.commit.committer[:email]
7
- author_email = commit.commit.author[:email]
6
+ committer_email = commit.rugged_commit.committer[:email]
7
+ author_email = commit.rugged_commit.author[:email]
8
8
 
9
9
  # attach mined info to commit
10
10
  commit.add_data(:email, {
11
11
  committer: committer_email,
12
12
  author: author_email
13
13
  })
14
-
15
- # return the commit
16
- commit
17
14
  end
18
15
  end
19
16
  end
@@ -7,14 +7,14 @@ module RepoMiner
7
7
  @repo_path = repo_path
8
8
  end
9
9
 
10
- def repository
10
+ def rugged_repository
11
11
  @repository ||= Rugged::Repository.new(repo_path)
12
12
  end
13
13
 
14
14
  def walk(branch)
15
- @walker = Rugged::Walker.new(repository)
15
+ @walker = Rugged::Walker.new(rugged_repository)
16
16
  @walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_REVERSE)
17
- @walker.push(repository.branches[branch].target_id)
17
+ @walker.push(rugged_repository.branches[branch].target_id)
18
18
  @walker
19
19
  end
20
20
 
@@ -1,3 +1,3 @@
1
1
  module RepoMiner
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/repo_miner.gemspec CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
11
11
 
12
12
  spec.summary = "Mine repositories for interesting changes over time"
13
13
  spec.homepage = "https://github.com/librariesio/repo_miner"
14
- spec.license = "APGL-3.0"
14
+ spec.license = "AGPL-3.0"
15
15
 
16
16
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
17
17
  f.match(%r{^(test|spec|features)/})
@@ -21,6 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.require_paths = ["lib"]
22
22
 
23
23
  spec.add_dependency "rugged"
24
+ spec.add_dependency "bibliothecary"
24
25
 
25
26
  spec.add_development_dependency "bundler", "~> 1.14"
26
27
  spec.add_development_dependency "rake", "~> 12.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: repo_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Nesbitt
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-13 00:00:00.000000000 Z
11
+ date: 2017-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rugged
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bibliothecary
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -86,13 +100,14 @@ files:
86
100
  - bin/setup
87
101
  - lib/repo_miner.rb
88
102
  - lib/repo_miner/commit.rb
103
+ - lib/repo_miner/miners/dependencies.rb
89
104
  - lib/repo_miner/miners/email.rb
90
105
  - lib/repo_miner/repository.rb
91
106
  - lib/repo_miner/version.rb
92
107
  - repo_miner.gemspec
93
108
  homepage: https://github.com/librariesio/repo_miner
94
109
  licenses:
95
- - APGL-3.0
110
+ - AGPL-3.0
96
111
  metadata: {}
97
112
  post_install_message:
98
113
  rdoc_options: []