github-pulse 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,247 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "time"
5
+ require "open3"
6
+
7
+ module Github
8
+ module Pulse
9
+ class GhClient
10
+ attr_reader :repo
11
+
12
+ def initialize(repo:)
13
+ @repo = repo
14
+ validate_gh_cli!
15
+ end
16
+
17
+ def available?
18
+ @gh_available
19
+ end
20
+
21
+ def pull_requests(since: nil, until_date: nil, state: "all")
22
+ limit = 1000
23
+ fields = "number,title,author,createdAt,closedAt,mergedAt,state,additions,deletions,changedFiles"
24
+
25
+ cmd = ["gh", "pr", "list", "--repo", repo, "--limit", limit.to_s, "--json", fields]
26
+
27
+ # gh doesn't support filtering by date directly, so we get all and filter
28
+ case state
29
+ when "open"
30
+ cmd += ["--state", "open"]
31
+ when "closed"
32
+ cmd += ["--state", "closed"]
33
+ when "merged"
34
+ cmd += ["--state", "merged"]
35
+ else
36
+ # For "all", we need to make multiple calls
37
+ open_prs = execute_gh_command(cmd + ["--state", "open"])
38
+ closed_prs = execute_gh_command(cmd + ["--state", "closed"])
39
+ merged_prs = execute_gh_command(cmd + ["--state", "merged"])
40
+ prs = open_prs + closed_prs + merged_prs
41
+ end
42
+
43
+ prs ||= execute_gh_command(cmd)
44
+
45
+ # Filter by date if needed
46
+ if since || until_date
47
+ prs = filter_by_date(prs, since, until_date) { |pr| Time.parse(pr["createdAt"]) }
48
+ end
49
+
50
+ prs.map do |pr|
51
+ {
52
+ number: pr["number"],
53
+ title: pr["title"],
54
+ author: pr.dig("author", "login") || "unknown",
55
+ created_at: parse_time(pr["createdAt"]),
56
+ closed_at: parse_time(pr["closedAt"]),
57
+ merged_at: parse_time(pr["mergedAt"]),
58
+ state: pr["state"].downcase,
59
+ additions: pr["additions"] || 0,
60
+ deletions: pr["deletions"] || 0,
61
+ changed_files: pr["changedFiles"] || 0
62
+ }
63
+ end
64
+ end
65
+
66
+ def repository_info
67
+ fields = "name,nameWithOwner,description,createdAt,updatedAt,primaryLanguage,defaultBranchRef,diskUsage,stargazerCount,forkCount,issues"
68
+ cmd = ["gh", "repo", "view", repo, "--json", fields]
69
+
70
+ data = execute_gh_command(cmd)
71
+ return nil unless data && !data.empty?
72
+ data = data.is_a?(Array) ? data.first : data
73
+
74
+ {
75
+ name: data["name"],
76
+ full_name: data["nameWithOwner"],
77
+ description: data["description"],
78
+ created_at: parse_time(data["createdAt"]),
79
+ updated_at: parse_time(data["updatedAt"]),
80
+ language: data.dig("primaryLanguage", "name"),
81
+ default_branch: data.dig("defaultBranchRef", "name"),
82
+ size: data["diskUsage"],
83
+ stars: data["stargazerCount"],
84
+ forks: data["forkCount"],
85
+ open_issues: data.dig("issues", "totalCount") || 0
86
+ }
87
+ end
88
+
89
+ def commit_activity
90
+ # gh doesn't have a direct equivalent to commit activity stats
91
+ # We can get recent commits and group them by week
92
+ days_back = 52 * 7 # Get a year of data
93
+ since_date = (Date.today - days_back).to_s
94
+
95
+ url = "repos/#{repo}/commits?since=#{since_date}"
96
+ cmd = ["gh", "api", url]
97
+
98
+ commits = execute_gh_command(cmd)
99
+ return [] unless commits.is_a?(Array)
100
+
101
+ # Group by week
102
+ activity = Hash.new(0)
103
+ commits.each do |commit|
104
+ date_str = commit.dig("commit", "author", "date")
105
+ next unless date_str
106
+
107
+ date = Date.parse(date_str)
108
+ week_start = date - date.cwday + 1
109
+ activity[week_start] += 1
110
+ end
111
+
112
+ activity.map do |week_start, count|
113
+ days = [0] * 7
114
+ # We don't have daily granularity from this API, so distribute evenly
115
+ days[0] = count
116
+
117
+ {
118
+ week_start: week_start,
119
+ days: days,
120
+ total: count
121
+ }
122
+ end
123
+ end
124
+
125
+ def commits_data(since: nil, until_date: nil)
126
+ # Fetch commit data from GitHub
127
+ # Build query string for parameters
128
+ params = []
129
+ params << "since=#{since}" if since
130
+ params << "until=#{until_date}" if until_date
131
+
132
+ url = "repos/#{repo}/commits"
133
+ url += "?#{params.join('&')}" unless params.empty?
134
+
135
+ cmd = ["gh", "api", url]
136
+
137
+ commits = execute_gh_command(cmd)
138
+
139
+ # Group commits by author
140
+ commits_by_author = Hash.new { |h, k| h[k] = [] }
141
+
142
+ commits.each do |commit|
143
+ author = commit.dig("author", "login") || commit.dig("commit", "author", "email") || "unknown"
144
+
145
+ commits_by_author[author] << {
146
+ sha: commit["sha"],
147
+ message: commit.dig("commit", "message")&.lines&.first&.strip,
148
+ time: parse_time(commit.dig("commit", "author", "date")),
149
+ additions: 0, # GitHub API doesn't provide this in commits list
150
+ deletions: 0 # Would need individual commit API calls
151
+ }
152
+ end
153
+
154
+ commits_by_author
155
+ end
156
+
157
+ def contributors_stats
158
+ # Use gh api to get contributor statistics
159
+ cmd = ["gh", "api", "repos/#{repo}/stats/contributors", "--cache", "1h"]
160
+
161
+ stats = execute_gh_command(cmd)
162
+ return [] unless stats && stats.is_a?(Array)
163
+
164
+ stats.map do |contributor|
165
+ {
166
+ author: contributor.dig("author", "login") || "unknown",
167
+ total_commits: contributor["total"],
168
+ weeks: (contributor["weeks"] || []).map do |week|
169
+ {
170
+ week_start: Time.at(week["w"]).to_date,
171
+ additions: week["a"],
172
+ deletions: week["d"],
173
+ commits: week["c"]
174
+ }
175
+ end
176
+ }
177
+ end
178
+ end
179
+
180
+ private
181
+
182
+ def validate_gh_cli!
183
+ # Check if gh is installed
184
+ _, _, status = Open3.capture3("which", "gh")
185
+ unless status.success?
186
+ @gh_available = false
187
+ return
188
+ end
189
+
190
+ # Check if gh is authenticated
191
+ _, stderr, status = Open3.capture3("gh", "auth", "status")
192
+ if status.success? || stderr.include?("Logged in")
193
+ @gh_available = true
194
+ else
195
+ @gh_available = false
196
+ end
197
+ rescue StandardError
198
+ @gh_available = false
199
+ end
200
+
201
+ def execute_gh_command(cmd)
202
+ stdout, stderr, status = Open3.capture3(*cmd)
203
+
204
+ # For paginated requests, gh may return partial success
205
+ # Check if we got any data even if status indicates an error
206
+ if !stdout.strip.empty?
207
+ begin
208
+ # Try to parse the output we did get
209
+ result = JSON.parse(stdout)
210
+ return result
211
+ rescue JSON::ParserError
212
+ # Some gh api commands return newline-delimited JSON
213
+ return stdout.lines.map { |line| JSON.parse(line.strip) rescue nil }.compact
214
+ end
215
+ end
216
+
217
+ unless status.success?
218
+ if stderr.include?("HTTP 404") || stderr.include?("not found")
219
+ return []
220
+ end
221
+ raise Error, "gh command failed: #{stderr}"
222
+ end
223
+
224
+ []
225
+ end
226
+
227
+ def filter_by_date(items, since, until_date)
228
+ items.select do |item|
229
+ date = yield(item)
230
+ next false unless date
231
+
232
+ date_check = true
233
+ date_check &&= date >= Time.parse(since) if since
234
+ date_check &&= date <= Time.parse(until_date) if until_date
235
+ date_check
236
+ end
237
+ end
238
+
239
+ def parse_time(time_str)
240
+ return nil unless time_str
241
+ Time.parse(time_str)
242
+ rescue StandardError
243
+ nil
244
+ end
245
+ end
246
+ end
247
+ end
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rugged"
4
+ require "time"
5
+
6
+ module Github
7
+ module Pulse
8
+ class GitAnalyzer
9
+ attr_reader :repository, :repo_path
10
+
11
+ def initialize(repo_path)
12
+ @repo_path = repo_path
13
+ @repository = Rugged::Repository.new(repo_path)
14
+ rescue Rugged::RepositoryError => e
15
+ raise Error, "Not a valid git repository: #{repo_path}"
16
+ end
17
+
18
+ def analyze_commits(since: nil, until_date: nil)
19
+ walker = Rugged::Walker.new(repository)
20
+ walker.push(repository.head.target_id)
21
+
22
+ commits_by_author = Hash.new { |h, k| h[k] = [] }
23
+
24
+ walker.each do |commit|
25
+ commit_time = Time.at(commit.time)
26
+
27
+ if since && commit_time < Time.parse(since)
28
+ break
29
+ end
30
+
31
+ if until_date && commit_time > Time.parse(until_date)
32
+ next
33
+ end
34
+
35
+ author = commit.author[:email]
36
+ commits_by_author[author] << {
37
+ sha: commit.oid,
38
+ message: commit.message.lines.first&.strip,
39
+ time: commit_time,
40
+ additions: 0,
41
+ deletions: 0
42
+ }
43
+ end
44
+
45
+ commits_by_author.each do |author, commits|
46
+ commits.each do |commit_data|
47
+ stats = calculate_commit_stats(commit_data[:sha])
48
+ commit_data[:additions] = stats[:additions]
49
+ commit_data[:deletions] = stats[:deletions]
50
+ end
51
+ end
52
+
53
+ commits_by_author
54
+ end
55
+
56
+ def lines_of_code_by_author
57
+ blame_data = Hash.new(0)
58
+
59
+ repository.head.target.tree.walk(:preorder) do |root, entry|
60
+ next unless entry[:type] == :blob
61
+
62
+ file_path = root.empty? ? entry[:name] : "#{root}/#{entry[:name]}"
63
+
64
+ next if binary_file?(file_path)
65
+
66
+ begin
67
+ blame = Rugged::Blame.new(repository, file_path)
68
+ blame.each do |hunk|
69
+ author = hunk[:final_signature][:email]
70
+ lines = hunk[:lines_in_hunk]
71
+ blame_data[author] += lines
72
+ end
73
+ rescue StandardError
74
+ next
75
+ end
76
+ end
77
+
78
+ blame_data
79
+ end
80
+
81
+ def commit_activity_by_day
82
+ walker = Rugged::Walker.new(repository)
83
+ walker.push(repository.head.target_id)
84
+
85
+ activity = Hash.new(0)
86
+
87
+ walker.each do |commit|
88
+ date = Time.at(commit.time).to_date
89
+ activity[date] += 1
90
+ end
91
+
92
+ activity.sort.to_h
93
+ end
94
+
95
+ def remote_url
96
+ remotes = repository.remotes
97
+ return nil if remotes.count == 0
98
+
99
+ origin = remotes["origin"]
100
+ return nil unless origin
101
+
102
+ url = origin.url
103
+ extract_github_repo(url)
104
+ end
105
+
106
+ private
107
+
108
+ def calculate_commit_stats(sha)
109
+ commit = repository.lookup(sha)
110
+
111
+ if commit.parents.empty?
112
+ tree = commit.tree
113
+ stats = { additions: 0, deletions: 0 }
114
+
115
+ tree.walk(:preorder) do |root, entry|
116
+ next unless entry[:type] == :blob
117
+ blob = repository.lookup(entry[:oid])
118
+ stats[:additions] += blob.content.lines.count
119
+ end
120
+
121
+ stats
122
+ else
123
+ parent = commit.parents.first
124
+ diff = parent.diff(commit)
125
+
126
+ stats = { additions: 0, deletions: 0 }
127
+ diff.each_patch do |patch|
128
+ patch.hunks.each do |hunk|
129
+ hunk.lines.each do |line|
130
+ case line.line_origin
131
+ when :addition
132
+ stats[:additions] += 1
133
+ when :deletion
134
+ stats[:deletions] += 1
135
+ end
136
+ end
137
+ end
138
+ end
139
+
140
+ stats
141
+ end
142
+ rescue StandardError
143
+ { additions: 0, deletions: 0 }
144
+ end
145
+
146
+ def binary_file?(path)
147
+ extensions = %w[.jpg .jpeg .png .gif .pdf .zip .tar .gz .exe .dll .so .dylib .o .a]
148
+ extensions.any? { |ext| path.downcase.end_with?(ext) }
149
+ end
150
+
151
+ def extract_github_repo(url)
152
+ patterns = [
153
+ %r{github\.com[:/]([^/]+/[^/]+?)(?:\.git)?$},
154
+ %r{git@github\.com:([^/]+/[^/]+?)(?:\.git)?$}
155
+ ]
156
+
157
+ patterns.each do |pattern|
158
+ if match = url.match(pattern)
159
+ return match[1]
160
+ end
161
+ end
162
+
163
+ nil
164
+ end
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "octokit"
4
+ require "time"
5
+
6
+ module Github
7
+ module Pulse
8
+ class GithubClient
9
+ attr_reader :client, :repo
10
+
11
+ def initialize(repo:, token: nil)
12
+ @repo = repo
13
+ @client = if token
14
+ Octokit::Client.new(access_token: token)
15
+ else
16
+ Octokit::Client.new
17
+ end
18
+ @client.auto_paginate = true
19
+ end
20
+
21
+ def pull_requests(since: nil, until_date: nil, state: "all")
22
+ prs = client.pull_requests(repo, state: state)
23
+
24
+ if since || until_date
25
+ prs = filter_by_date(prs, since, until_date, &:created_at)
26
+ end
27
+
28
+ prs.map do |pr|
29
+ {
30
+ number: pr.number,
31
+ title: pr.title,
32
+ author: pr.user.login,
33
+ created_at: pr.created_at,
34
+ closed_at: pr.closed_at,
35
+ merged_at: pr.merged_at,
36
+ state: pr.state,
37
+ additions: pr.additions || 0,
38
+ deletions: pr.deletions || 0,
39
+ changed_files: pr.changed_files || 0
40
+ }
41
+ end
42
+ end
43
+
44
+ def contributors_stats
45
+ stats = client.contributors_stats(repo)
46
+ return [] unless stats
47
+
48
+ stats.map do |contributor|
49
+ {
50
+ author: contributor.author.login,
51
+ total_commits: contributor.total,
52
+ weeks: contributor.weeks.map do |week|
53
+ {
54
+ week_start: Time.at(week.w).to_date,
55
+ additions: week.a,
56
+ deletions: week.d,
57
+ commits: week.c
58
+ }
59
+ end
60
+ }
61
+ end
62
+ rescue Octokit::Accepted => e
63
+ sleep 2
64
+ retry
65
+ end
66
+
67
+ def commit_activity
68
+ activity = client.commit_activity_stats(repo)
69
+ return [] unless activity
70
+
71
+ activity.map do |week|
72
+ {
73
+ week_start: Time.at(week.week).to_date,
74
+ days: week.days,
75
+ total: week.total
76
+ }
77
+ end
78
+ rescue Octokit::Accepted => e
79
+ sleep 2
80
+ retry
81
+ end
82
+
83
+ def repository_info
84
+ repo_data = client.repository(repo)
85
+ {
86
+ name: repo_data.name,
87
+ full_name: repo_data.full_name,
88
+ description: repo_data.description,
89
+ created_at: repo_data.created_at,
90
+ updated_at: repo_data.updated_at,
91
+ language: repo_data.language,
92
+ default_branch: repo_data.default_branch,
93
+ size: repo_data.size,
94
+ stars: repo_data.stargazers_count,
95
+ forks: repo_data.forks_count,
96
+ open_issues: repo_data.open_issues_count
97
+ }
98
+ end
99
+
100
+ private
101
+
102
+ def filter_by_date(items, since, until_date)
103
+ items.select do |item|
104
+ date = yield(item)
105
+ next false unless date
106
+
107
+ date_check = true
108
+ date_check &&= date >= Time.parse(since) if since
109
+ date_check &&= date <= Time.parse(until_date) if until_date
110
+ date_check
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end