archsight 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +26 -5
- data/lib/archsight/analysis/executor.rb +112 -0
- data/lib/archsight/analysis/result.rb +174 -0
- data/lib/archsight/analysis/sandbox.rb +319 -0
- data/lib/archsight/analysis.rb +11 -0
- data/lib/archsight/annotations/architecture_annotations.rb +2 -2
- data/lib/archsight/cli.rb +163 -0
- data/lib/archsight/database.rb +6 -2
- data/lib/archsight/helpers/analysis_renderer.rb +83 -0
- data/lib/archsight/helpers/formatting.rb +95 -0
- data/lib/archsight/helpers.rb +20 -4
- data/lib/archsight/import/concurrent_progress.rb +341 -0
- data/lib/archsight/import/executor.rb +466 -0
- data/lib/archsight/import/git_analytics.rb +626 -0
- data/lib/archsight/import/handler.rb +263 -0
- data/lib/archsight/import/handlers/github.rb +161 -0
- data/lib/archsight/import/handlers/gitlab.rb +202 -0
- data/lib/archsight/import/handlers/jira_base.rb +189 -0
- data/lib/archsight/import/handlers/jira_discover.rb +161 -0
- data/lib/archsight/import/handlers/jira_metrics.rb +179 -0
- data/lib/archsight/import/handlers/openapi_schema_parser.rb +279 -0
- data/lib/archsight/import/handlers/repository.rb +439 -0
- data/lib/archsight/import/handlers/rest_api.rb +293 -0
- data/lib/archsight/import/handlers/rest_api_index.rb +183 -0
- data/lib/archsight/import/progress.rb +91 -0
- data/lib/archsight/import/registry.rb +54 -0
- data/lib/archsight/import/shared_file_writer.rb +67 -0
- data/lib/archsight/import/team_matcher.rb +195 -0
- data/lib/archsight/import.rb +14 -0
- data/lib/archsight/resources/analysis.rb +91 -0
- data/lib/archsight/resources/application_component.rb +2 -2
- data/lib/archsight/resources/application_service.rb +12 -12
- data/lib/archsight/resources/business_product.rb +12 -12
- data/lib/archsight/resources/data_object.rb +1 -1
- data/lib/archsight/resources/import.rb +79 -0
- data/lib/archsight/resources/technology_artifact.rb +23 -2
- data/lib/archsight/version.rb +1 -1
- data/lib/archsight/web/api/docs.rb +17 -0
- data/lib/archsight/web/api/json_helpers.rb +164 -0
- data/lib/archsight/web/api/openapi/spec.yaml +500 -0
- data/lib/archsight/web/api/routes.rb +101 -0
- data/lib/archsight/web/application.rb +66 -43
- data/lib/archsight/web/doc/import.md +458 -0
- data/lib/archsight/web/doc/index.md.erb +1 -0
- data/lib/archsight/web/public/css/artifact.css +10 -0
- data/lib/archsight/web/public/css/graph.css +14 -0
- data/lib/archsight/web/public/css/instance.css +489 -0
- data/lib/archsight/web/views/api_docs.erb +19 -0
- data/lib/archsight/web/views/partials/artifact/_project_estimate.haml +14 -8
- data/lib/archsight/web/views/partials/instance/_analysis_detail.haml +74 -0
- data/lib/archsight/web/views/partials/instance/_analysis_result.haml +64 -0
- data/lib/archsight/web/views/partials/instance/_detail.haml +7 -3
- data/lib/archsight/web/views/partials/instance/_import_detail.haml +87 -0
- data/lib/archsight/web/views/partials/instance/_relations.haml +4 -4
- data/lib/archsight/web/views/partials/layout/_content.haml +4 -0
- data/lib/archsight/web/views/partials/layout/_navigation.haml +6 -5
- metadata +78 -1
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "time"
|
|
4
|
+
require "open3"
|
|
5
|
+
require "archsight/import"
|
|
6
|
+
|
|
7
|
+
# Repository health metrics analyzer (human activity only)
|
|
8
|
+
#
|
|
9
|
+
# Analyzes git repositories to extract:
|
|
10
|
+
# - Commits, contributors, top contributors (full history for team matching)
|
|
11
|
+
# - Recent tags (last 2 years)
|
|
12
|
+
# - Bus factor risk (low / medium / high / unknown)
|
|
13
|
+
# - Activity status (active / bot-only / abandoned)
|
|
14
|
+
# - Deployment types, workflow platforms, OCI images
|
|
15
|
+
# - Agentic tools configuration
|
|
16
|
+
# - README description and documentation links
|
|
17
|
+
#
|
|
18
|
+
# @example
|
|
19
|
+
# analytics = Archsight::Import::GitAnalytics.new("/path/to/repo")
|
|
20
|
+
# result = analytics.analyze
|
|
21
|
+
class Archsight::Import::GitAnalytics
|
|
22
|
+
DEFAULT_SINCE_DAYS = 180
|
|
23
|
+
DEFAULT_HIGH_THRESH = 0.75
|
|
24
|
+
DEFAULT_MED_THRESH = 0.50
|
|
25
|
+
|
|
26
|
+
IGNORED_BOTS = [
|
|
27
|
+
/dependabot/i,
|
|
28
|
+
/renovate\[bot\]/i,
|
|
29
|
+
/greenkeeper/i,
|
|
30
|
+
/ci\s+bot/i
|
|
31
|
+
].freeze
|
|
32
|
+
|
|
33
|
+
AGENTIC_FILES = {
|
|
34
|
+
"claude" => %w[claude.md .claude.md docs/claude.md CLAUDE.md],
|
|
35
|
+
"cursor" => %w[.cursorrules .cursor/rules cursor.md],
|
|
36
|
+
"aider" => %w[.aider.conf.yml aider.md docs/aider.md],
|
|
37
|
+
"github-copilot" => %w[.github/copilot-instructions.md],
|
|
38
|
+
"agents" => %w[agents.md .agents.md docs/agents.md]
|
|
39
|
+
}.freeze
|
|
40
|
+
|
|
41
|
+
def initialize(repo_path, options = {})
|
|
42
|
+
@repo_path = repo_path
|
|
43
|
+
@since_days = options[:since_days] || DEFAULT_SINCE_DAYS
|
|
44
|
+
@high_thresh = options[:high_thresh] || DEFAULT_HIGH_THRESH
|
|
45
|
+
@med_thresh = options[:med_thresh] || DEFAULT_MED_THRESH
|
|
46
|
+
@since_iso = (Time.now - (@since_days * 86_400)).utc.iso8601
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def analyze
|
|
50
|
+
{
|
|
51
|
+
"commits" => commit_count,
|
|
52
|
+
"commits_per_month" => commits_per_month,
|
|
53
|
+
"contributors" => contributor_count,
|
|
54
|
+
"contributors_6m" => contributors_6m_unique,
|
|
55
|
+
"contributors_per_month" => contributors_per_month,
|
|
56
|
+
"top_contributors" => top_contributors,
|
|
57
|
+
"recent_tags" => recent_tags,
|
|
58
|
+
"activity_status" => activity_status,
|
|
59
|
+
"created_at" => created_at,
|
|
60
|
+
"last_human_commit" => last_human_commit,
|
|
61
|
+
"bus_factor_risk" => bus_factor_risk,
|
|
62
|
+
"agentic_tools" => agentic_tools,
|
|
63
|
+
"deployment_types" => deployment_types,
|
|
64
|
+
"workflow_platforms" => workflow_platforms,
|
|
65
|
+
"workflow_types" => workflow_types,
|
|
66
|
+
"oci_images" => oci_images,
|
|
67
|
+
"description" => description,
|
|
68
|
+
"documentation_links" => documentation_links
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
# Run a git command inside the repo
|
|
75
|
+
def git(*git_args)
|
|
76
|
+
cmd = ["git", "-C", @repo_path] + git_args
|
|
77
|
+
out, err, status = Open3.capture3(*cmd)
|
|
78
|
+
raise "git failed: #{cmd.join(" ")}\n#{err}" unless status.success?
|
|
79
|
+
|
|
80
|
+
out.force_encoding("UTF-8").encode("UTF-8", invalid: :replace, undef: :replace, replace: "?").strip
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Check if author string matches bot patterns
|
|
84
|
+
def bot?(author_str)
|
|
85
|
+
IGNORED_BOTS.any? { |re| author_str =~ re }
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Determine the most recent ref (local or remote)
|
|
89
|
+
def most_recent_ref
|
|
90
|
+
@most_recent_ref ||= find_most_recent_ref
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def find_most_recent_ref
|
|
94
|
+
refs = git(
|
|
95
|
+
"for-each-ref",
|
|
96
|
+
"--sort=-committerdate",
|
|
97
|
+
"--format=%(refname:short)",
|
|
98
|
+
"refs/heads/",
|
|
99
|
+
"refs/remotes/"
|
|
100
|
+
).split("\n")
|
|
101
|
+
|
|
102
|
+
ref = refs.find { |r| !r.empty? }
|
|
103
|
+
return ref if ref && !ref.empty?
|
|
104
|
+
|
|
105
|
+
%w[main master].each do |candidate|
|
|
106
|
+
return candidate if git("rev-parse", "--verify", candidate)
|
|
107
|
+
rescue StandardError
|
|
108
|
+
nil
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
"HEAD"
|
|
112
|
+
rescue StandardError
|
|
113
|
+
"HEAD"
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Get all commits from full history (raw, including bots)
|
|
117
|
+
def raw_commit_lines
|
|
118
|
+
@raw_commit_lines ||= git(
|
|
119
|
+
"log",
|
|
120
|
+
most_recent_ref,
|
|
121
|
+
"--no-merges",
|
|
122
|
+
"--pretty=format:%H|%an|%ae",
|
|
123
|
+
"--"
|
|
124
|
+
).split("\n").map { |line| line.split("|", 3) }
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Get recent commits (within since_days window)
|
|
128
|
+
def recent_commit_lines
|
|
129
|
+
@recent_commit_lines ||= git(
|
|
130
|
+
"log",
|
|
131
|
+
most_recent_ref,
|
|
132
|
+
"--since=#{@since_iso}",
|
|
133
|
+
"--no-merges",
|
|
134
|
+
"--pretty=format:%H|%an|%ae",
|
|
135
|
+
"--"
|
|
136
|
+
).split("\n").map { |line| line.split("|", 3) }
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Get human-only commits from full history
|
|
140
|
+
def human_commits
|
|
141
|
+
@human_commits ||= raw_commit_lines.reject { |_, author, _| bot?(author) }
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Get recent human-only commits for activity status
|
|
145
|
+
def recent_human_commits
|
|
146
|
+
@recent_human_commits ||= recent_commit_lines.reject { |_, author, _| bot?(author) }
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Get commits from the last 6 months (for bus factor calculation)
|
|
150
|
+
def commits_6m
|
|
151
|
+
@commits_6m ||= begin
|
|
152
|
+
six_months_ago = (Time.now - (6 * 30 * 24 * 60 * 60)).strftime("%Y-%m-%d")
|
|
153
|
+
git(
|
|
154
|
+
"log",
|
|
155
|
+
most_recent_ref,
|
|
156
|
+
"--since=#{six_months_ago}",
|
|
157
|
+
"--no-merges",
|
|
158
|
+
"--pretty=format:%H|%an|%ae",
|
|
159
|
+
"--"
|
|
160
|
+
).split("\n").map { |line| line.split("|", 3) }
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Get human-only commits from the last 6 months
|
|
165
|
+
def human_commits_6m
|
|
166
|
+
@human_commits_6m ||= commits_6m.reject { |_, author, _| bot?(author) }
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def commit_count
|
|
170
|
+
human_commits.size
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def last_human_commit
|
|
174
|
+
return nil if human_commits.empty?
|
|
175
|
+
|
|
176
|
+
most_recent_hash = human_commits.first.first
|
|
177
|
+
git("show", "-s", "--format=%cI", most_recent_hash)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def created_at
|
|
181
|
+
return nil if raw_commit_lines.empty?
|
|
182
|
+
|
|
183
|
+
oldest_hash = raw_commit_lines.last.first
|
|
184
|
+
git("show", "-s", "--format=%cI", oldest_hash)
|
|
185
|
+
rescue StandardError
|
|
186
|
+
nil
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def commits_per_month
|
|
190
|
+
return [] if raw_commit_lines.empty?
|
|
191
|
+
|
|
192
|
+
dates_output = git(
|
|
193
|
+
"log",
|
|
194
|
+
most_recent_ref,
|
|
195
|
+
"--no-merges",
|
|
196
|
+
"--pretty=format:%cI",
|
|
197
|
+
"--"
|
|
198
|
+
)
|
|
199
|
+
return [] if dates_output.empty?
|
|
200
|
+
|
|
201
|
+
commit_dates = dates_output.split("\n").filter_map do |d|
|
|
202
|
+
Time.parse(d)
|
|
203
|
+
rescue StandardError
|
|
204
|
+
nil
|
|
205
|
+
end
|
|
206
|
+
return [] if commit_dates.empty?
|
|
207
|
+
|
|
208
|
+
counts_by_month = commit_dates.each_with_object(Hash.new(0)) do |date, h|
|
|
209
|
+
key = date.strftime("%Y-%m")
|
|
210
|
+
h[key] += 1
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
first_month = commit_dates.min.strftime("%Y-%m")
|
|
214
|
+
last_month = Time.now.strftime("%Y-%m")
|
|
215
|
+
|
|
216
|
+
all_months = generate_month_range(first_month, last_month)
|
|
217
|
+
all_months.map { |m| counts_by_month[m] || 0 }
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def generate_month_range(start_month, end_month)
|
|
221
|
+
start_year, start_mon = start_month.split("-").map(&:to_i)
|
|
222
|
+
end_year, end_mon = end_month.split("-").map(&:to_i)
|
|
223
|
+
|
|
224
|
+
months = []
|
|
225
|
+
year = start_year
|
|
226
|
+
mon = start_mon
|
|
227
|
+
while year < end_year || (year == end_year && mon <= end_mon)
|
|
228
|
+
months << format("%04d-%02d", year, mon)
|
|
229
|
+
mon += 1
|
|
230
|
+
if mon > 12
|
|
231
|
+
mon = 1
|
|
232
|
+
year += 1
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
months
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def contributor_count
|
|
239
|
+
contrib_counter.size
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def contributors_per_month
|
|
243
|
+
return [] if human_commits.empty?
|
|
244
|
+
|
|
245
|
+
dates_output = git(
|
|
246
|
+
"log",
|
|
247
|
+
most_recent_ref,
|
|
248
|
+
"--no-merges",
|
|
249
|
+
"--pretty=format:%cI|%an|%ae",
|
|
250
|
+
"--"
|
|
251
|
+
)
|
|
252
|
+
return [] if dates_output.empty?
|
|
253
|
+
|
|
254
|
+
commits_with_dates = dates_output.split("\n").filter_map do |line|
|
|
255
|
+
parts = line.split("|", 3)
|
|
256
|
+
next nil if parts.length < 3
|
|
257
|
+
|
|
258
|
+
date_str, author_name, author_email = parts
|
|
259
|
+
next nil if bot?(author_name)
|
|
260
|
+
|
|
261
|
+
date = begin
|
|
262
|
+
Time.parse(date_str)
|
|
263
|
+
rescue StandardError
|
|
264
|
+
nil
|
|
265
|
+
end
|
|
266
|
+
next nil unless date
|
|
267
|
+
|
|
268
|
+
{ date: date, author: "#{author_name}|#{author_email}" }
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
return [] if commits_with_dates.empty?
|
|
272
|
+
|
|
273
|
+
contributors_by_month = commits_with_dates.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |commit, h|
|
|
274
|
+
key = commit[:date].strftime("%Y-%m")
|
|
275
|
+
h[key] << commit[:author]
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
first_month = commits_with_dates.map { |c| c[:date] }.min.strftime("%Y-%m")
|
|
279
|
+
last_month = Time.now.strftime("%Y-%m")
|
|
280
|
+
|
|
281
|
+
all_months = generate_month_range(first_month, last_month)
|
|
282
|
+
all_months.map { |m| contributors_by_month[m]&.size || 0 }
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def contrib_counter
|
|
286
|
+
@contrib_counter ||= human_commits.each_with_object(Hash.new { |h, k| h[k] = 0 }) do |(_, name, email), h|
|
|
287
|
+
key = [name, email]
|
|
288
|
+
h[key] += 1
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def top_contributors
|
|
293
|
+
contrib_counter
|
|
294
|
+
.sort_by { |_key, cnt| -cnt }
|
|
295
|
+
.first(50)
|
|
296
|
+
.map { |(name, email), cnt| { "name" => name, "email" => email, "commits" => cnt } }
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def recent_tags
|
|
300
|
+
@recent_tags ||= begin
|
|
301
|
+
two_years_ago = (Time.now - (730 * 86_400)).utc.iso8601
|
|
302
|
+
raw_tags = git(
|
|
303
|
+
"for-each-ref",
|
|
304
|
+
"--sort=-creatordate",
|
|
305
|
+
"--format=%(refname:short) %(creatordate:iso8601)",
|
|
306
|
+
"refs/tags"
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
raw_tags.each_line
|
|
310
|
+
.map { |l| l.split(" ", 2) }
|
|
311
|
+
.select { |_, date| date && date >= two_years_ago }
|
|
312
|
+
.map { |name, date| { "name" => name, "date" => date.chomp } }
|
|
313
|
+
.first(10)
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def activity_status
|
|
318
|
+
if recent_human_commits.empty? && recent_commit_lines.empty?
|
|
319
|
+
"abandoned"
|
|
320
|
+
elsif recent_human_commits.empty?
|
|
321
|
+
"bot-only"
|
|
322
|
+
else
|
|
323
|
+
"active"
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def contributors_6m_unique
|
|
328
|
+
@contributors_6m_unique ||= calculate_contributors_6m_unique
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def calculate_contributors_6m_unique
|
|
332
|
+
return 0 if human_commits_6m.empty?
|
|
333
|
+
|
|
334
|
+
human_commits_6m.map { |_, author, _| author }.uniq.size
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
def bus_factor_risk
|
|
338
|
+
return "unknown" if human_commits_6m.empty?
|
|
339
|
+
|
|
340
|
+
commits_by_author = Hash.new(0)
|
|
341
|
+
human_commits_6m.each { |_, author, _| commits_by_author[author] += 1 }
|
|
342
|
+
|
|
343
|
+
total_6m = commits_by_author.values.sum
|
|
344
|
+
top_6m = commits_by_author.values.max
|
|
345
|
+
|
|
346
|
+
share = total_6m.zero? ? 0.0 : top_6m.to_f / total_6m
|
|
347
|
+
|
|
348
|
+
if share > @high_thresh
|
|
349
|
+
"high"
|
|
350
|
+
elsif share > @med_thresh
|
|
351
|
+
"medium"
|
|
352
|
+
else
|
|
353
|
+
"low"
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
def agentic_tools
|
|
358
|
+
tools = []
|
|
359
|
+
|
|
360
|
+
AGENTIC_FILES.each do |tool, files|
|
|
361
|
+
files.each do |file|
|
|
362
|
+
if File.exist?(File.join(@repo_path, file))
|
|
363
|
+
tools << tool
|
|
364
|
+
break
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
tools.uniq!
|
|
370
|
+
tools.empty? ? "none" : tools.join(",")
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
def deployment_types
|
|
374
|
+
types = []
|
|
375
|
+
types << "container" if File.exist?(File.join(@repo_path, "Dockerfile"))
|
|
376
|
+
types << "chart" if Dir.exist?(File.join(@repo_path, "charts")) || Dir.exist?(File.join(@repo_path, "helm"))
|
|
377
|
+
types << "debian" if File.exist?(File.join(@repo_path, "debian/control"))
|
|
378
|
+
types << "rpm" if File.exist?(File.join(@repo_path, ".spec")) || Dir.glob(File.join(@repo_path, "*.spec")).any?
|
|
379
|
+
|
|
380
|
+
makefile_path = File.join(@repo_path, "Makefile")
|
|
381
|
+
if File.exist?(makefile_path)
|
|
382
|
+
makefile_content = File.read(makefile_path)
|
|
383
|
+
types << "binary" if makefile_content.match?(/\bbuild\b/i)
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
types << "none" if types.empty?
|
|
387
|
+
types.join(",")
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
def oci_images
|
|
391
|
+
@oci_images ||= begin
|
|
392
|
+
images = []
|
|
393
|
+
|
|
394
|
+
# Search GitHub Actions workflows
|
|
395
|
+
workflows_dir = File.join(@repo_path, ".github/workflows")
|
|
396
|
+
if Dir.exist?(workflows_dir)
|
|
397
|
+
Dir.glob(File.join(workflows_dir, "*.{yml,yaml}")).each do |workflow_file|
|
|
398
|
+
images.concat(extract_oci_images_from_file(workflow_file))
|
|
399
|
+
end
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# Search GitLab CI
|
|
403
|
+
gitlab_ci = File.join(@repo_path, ".gitlab-ci.yml")
|
|
404
|
+
images.concat(extract_oci_images_from_file(gitlab_ci)) if File.exist?(gitlab_ci)
|
|
405
|
+
|
|
406
|
+
# Infer from Dockerfile if no explicit references found
|
|
407
|
+
if deployment_types.include?("container") && images.empty?
|
|
408
|
+
repo_name = File.basename(@repo_path)
|
|
409
|
+
images << "ghcr.io/ionos-cloud/#{repo_name}" if @repo_path.include?("ionos-cloud") || @repo_path.include?("github.com")
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
images.uniq
|
|
413
|
+
end
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
def extract_oci_images_from_file(file_path)
|
|
417
|
+
return [] unless File.exist?(file_path)
|
|
418
|
+
|
|
419
|
+
images = []
|
|
420
|
+
content = File.read(file_path)
|
|
421
|
+
|
|
422
|
+
# Pattern 1: images: ghcr.io/ionos-cloud/repo-name or harbor...
|
|
423
|
+
content.scan(/images:\s*[|\n]\s*([^\s]+(?:ghcr\.io|harbor)[^\s]+)/m).flatten.each do |img|
|
|
424
|
+
img.split("\n").each do |line|
|
|
425
|
+
line = line.strip
|
|
426
|
+
next if line.empty? || line.start_with?("type=")
|
|
427
|
+
|
|
428
|
+
images << line if line.match?(/ghcr\.io|harbor/)
|
|
429
|
+
end
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
# Pattern 2: Direct image references
|
|
433
|
+
content.scan(%r{(?:ghcr\.io|harbor[^\s]*)/([^\s:]+)}).flatten.each do |path|
|
|
434
|
+
images << "ghcr.io/#{path}" unless images.any? { |img| img.include?(path) }
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
images
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
def workflow_platforms
|
|
441
|
+
platforms = []
|
|
442
|
+
platforms << "github-actions" if Dir.exist?(File.join(@repo_path, ".github/workflows"))
|
|
443
|
+
platforms << "gitlab-ci" if File.exist?(File.join(@repo_path, ".gitlab-ci.yml"))
|
|
444
|
+
platforms << "makefile" if File.exist?(File.join(@repo_path, "Makefile"))
|
|
445
|
+
platforms << "none" if platforms.empty?
|
|
446
|
+
platforms.join(",")
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
def workflow_types
|
|
450
|
+
types = []
|
|
451
|
+
workflow_files = collect_workflow_files
|
|
452
|
+
|
|
453
|
+
workflow_files.each do |file|
|
|
454
|
+
next unless File.exist?(file)
|
|
455
|
+
|
|
456
|
+
content = File.read(file)
|
|
457
|
+
content_lower = content.downcase
|
|
458
|
+
|
|
459
|
+
types << "build" if content_lower.match?(/\b(build|compile|docker build|go build|npm run build|maven|gradle)\b/)
|
|
460
|
+
types << "test" if content_lower.match?(/\btest[^-]|\bmake test\b/)
|
|
461
|
+
types << "unit-test" if content_lower.match?(/\b(unit[- ]test|unittest|test.*unit|jest|pytest|rspec|go test.*-short)\b/)
|
|
462
|
+
types << "integration-test" if content_lower.match?(/\b(integration[- ]test|test.*integration|e2e|end-to-end)\b/)
|
|
463
|
+
types << "smoke-test" if content_lower.match?(/\b(smoke[- ]test|test.*smoke)\b/)
|
|
464
|
+
types << "deploy" if content_lower.match?(/\b(deploy|push|publish|release|kubectl apply|helm (install|upgrade))\b/)
|
|
465
|
+
types << "lint" if content_lower.match?(/\b(lint|eslint|rubocop|pylint|golangci-lint|flake8|checkstyle)\b/)
|
|
466
|
+
types << "security-scan" if content_lower.match?(/\b(trivy|snyk|sonarqube|codeql|security[- ]scan|vulnerability|scan.*image|bundler-audit|brakeman|ruby_audit|npm audit|yarn audit|safety check|bandit|gosec)\b/)
|
|
467
|
+
types << "dependency-update" if content_lower.match?(/\b(dependabot|renovate|dependency.*update|update.*depend)\b/)
|
|
468
|
+
types << "ticket-creation" if content_lower.match?(/\b(jira|tosm|create.*ticket|create.*issue|atlassian)\b/)
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
# Check for dependency update config files
|
|
472
|
+
if File.exist?(File.join(@repo_path, ".github/dependabot.yml")) ||
|
|
473
|
+
File.exist?(File.join(@repo_path, ".github/dependabot.yaml")) ||
|
|
474
|
+
File.exist?(File.join(@repo_path, "renovate.json")) ||
|
|
475
|
+
File.exist?(File.join(@repo_path, ".renovaterc"))
|
|
476
|
+
types << "dependency-update"
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
types.uniq!
|
|
480
|
+
types << "none" if types.empty?
|
|
481
|
+
types.join(",")
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
def collect_workflow_files
|
|
485
|
+
files = []
|
|
486
|
+
files += Dir.glob(File.join(@repo_path, ".github/workflows/*.{yml,yaml}"))
|
|
487
|
+
files << File.join(@repo_path, ".gitlab-ci.yml") if File.exist?(File.join(@repo_path, ".gitlab-ci.yml"))
|
|
488
|
+
files << File.join(@repo_path, "Makefile") if File.exist?(File.join(@repo_path, "Makefile"))
|
|
489
|
+
files
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
def description
|
|
493
|
+
@description ||= extract_description
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
def documentation_links
|
|
497
|
+
@documentation_links ||= extract_links
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
def extract_links
|
|
501
|
+
readme_files = Dir.glob(File.join(@repo_path, "README*"), File::FNM_CASEFOLD)
|
|
502
|
+
readme_file = readme_files.first
|
|
503
|
+
return [] unless readme_file && File.exist?(readme_file)
|
|
504
|
+
|
|
505
|
+
content = read_file_with_encoding(readme_file)
|
|
506
|
+
return [] unless content
|
|
507
|
+
|
|
508
|
+
links = []
|
|
509
|
+
|
|
510
|
+
# Match markdown links: [text](url) - only http/https
|
|
511
|
+
content.scan(/\[([^\]]+)\]\(([^)]+)\)/).each do |text, url|
|
|
512
|
+
next unless url.match?(%r{^https?://})
|
|
513
|
+
next if text.match?(/^!/) # Skip images
|
|
514
|
+
|
|
515
|
+
url = url.strip.split(/\s+/).first
|
|
516
|
+
clean_text = text.strip.gsub(/[*_`~]/, "")
|
|
517
|
+
links << { "text" => clean_text, "url" => url }
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
# Match bare URLs (http/https)
|
|
521
|
+
content.scan(%r{(?<![(\[])(https?://[^\s<>)\]]+)}).flatten.each do |url|
|
|
522
|
+
next if links.any? { |link| link["url"] == url }
|
|
523
|
+
|
|
524
|
+
domain = begin
|
|
525
|
+
url.match(%r{https?://([^/]+)})[1]
|
|
526
|
+
rescue StandardError
|
|
527
|
+
url
|
|
528
|
+
end
|
|
529
|
+
links << { "text" => domain, "url" => url }
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
links.uniq { |link| link["url"] }
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
def extract_description
|
|
536
|
+
readme_files = Dir.glob(File.join(@repo_path, "README*"), File::FNM_CASEFOLD)
|
|
537
|
+
readme_file = readme_files.first
|
|
538
|
+
return nil unless readme_file && File.exist?(readme_file)
|
|
539
|
+
|
|
540
|
+
content = read_file_with_encoding(readme_file)
|
|
541
|
+
return nil unless content
|
|
542
|
+
|
|
543
|
+
description_lines = extract_description_lines(content)
|
|
544
|
+
return nil if description_lines.empty?
|
|
545
|
+
|
|
546
|
+
desc = description_lines.join("\n").strip
|
|
547
|
+
truncate_at_sentence_boundary(desc)
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
def read_file_with_encoding(file_path)
|
|
551
|
+
raw = File.read(file_path, mode: "rb")
|
|
552
|
+
|
|
553
|
+
if raw.start_with?("\xFF\xFE".b)
|
|
554
|
+
raw.force_encoding("UTF-16LE").encode("UTF-8")
|
|
555
|
+
elsif raw.start_with?("\xFE\xFF".b)
|
|
556
|
+
raw.force_encoding("UTF-16BE").encode("UTF-8")
|
|
557
|
+
elsif raw.start_with?("\xEF\xBB\xBF".b)
|
|
558
|
+
raw.force_encoding("UTF-8")[3..]
|
|
559
|
+
else
|
|
560
|
+
raw.force_encoding("UTF-8").encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
|
|
561
|
+
end
|
|
562
|
+
rescue StandardError
|
|
563
|
+
nil
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
def extract_description_lines(content)
|
|
567
|
+
content = content.sub(/^\uFEFF/, "")
|
|
568
|
+
|
|
569
|
+
lines = content.lines
|
|
570
|
+
description_lines = []
|
|
571
|
+
found_first_paragraph = false
|
|
572
|
+
blank_line_count = 0
|
|
573
|
+
|
|
574
|
+
lines.each do |line|
|
|
575
|
+
line = line.encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
|
|
576
|
+
stripped = line.strip
|
|
577
|
+
|
|
578
|
+
unless found_first_paragraph
|
|
579
|
+
next if stripped.empty?
|
|
580
|
+
next if stripped.match?(/^#\s+/)
|
|
581
|
+
next if stripped.match?(/^\[!\[|^!\[/)
|
|
582
|
+
next if stripped.match?(/^\[.*\]\(.*\)$/) && !stripped.include?(" ")
|
|
583
|
+
|
|
584
|
+
found_first_paragraph = true
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
if stripped.empty?
|
|
588
|
+
blank_line_count += 1
|
|
589
|
+
break if blank_line_count >= 2 && description_lines.any?
|
|
590
|
+
|
|
591
|
+
description_lines << "" if description_lines.any?
|
|
592
|
+
next
|
|
593
|
+
else
|
|
594
|
+
blank_line_count = 0
|
|
595
|
+
end
|
|
596
|
+
|
|
597
|
+
break if stripped.match?(/^##\s+/)
|
|
598
|
+
|
|
599
|
+
description_lines << stripped
|
|
600
|
+
break if description_lines.join("\n").length > 1500
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
description_lines
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
def truncate_at_sentence_boundary(description)
|
|
607
|
+
return description if description.length <= 600
|
|
608
|
+
|
|
609
|
+
paragraphs = description.split(/\n\n+/)
|
|
610
|
+
result_paragraphs = []
|
|
611
|
+
current_length = 0
|
|
612
|
+
|
|
613
|
+
paragraphs.each do |para|
|
|
614
|
+
candidate_length = current_length + (result_paragraphs.empty? ? 0 : 2) + para.length
|
|
615
|
+
break if current_length >= 600 && candidate_length > 1200
|
|
616
|
+
|
|
617
|
+
result_paragraphs << para
|
|
618
|
+
current_length = candidate_length
|
|
619
|
+
break if current_length > 1200
|
|
620
|
+
end
|
|
621
|
+
|
|
622
|
+
return description if result_paragraphs.empty?
|
|
623
|
+
|
|
624
|
+
result_paragraphs.join("\n\n")
|
|
625
|
+
end
|
|
626
|
+
end
|