archsight 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +26 -5
  3. data/lib/archsight/analysis/executor.rb +112 -0
  4. data/lib/archsight/analysis/result.rb +174 -0
  5. data/lib/archsight/analysis/sandbox.rb +319 -0
  6. data/lib/archsight/analysis.rb +11 -0
  7. data/lib/archsight/annotations/architecture_annotations.rb +2 -2
  8. data/lib/archsight/cli.rb +163 -0
  9. data/lib/archsight/database.rb +6 -2
  10. data/lib/archsight/helpers/analysis_renderer.rb +83 -0
  11. data/lib/archsight/helpers/formatting.rb +95 -0
  12. data/lib/archsight/helpers.rb +20 -4
  13. data/lib/archsight/import/concurrent_progress.rb +341 -0
  14. data/lib/archsight/import/executor.rb +466 -0
  15. data/lib/archsight/import/git_analytics.rb +626 -0
  16. data/lib/archsight/import/handler.rb +263 -0
  17. data/lib/archsight/import/handlers/github.rb +161 -0
  18. data/lib/archsight/import/handlers/gitlab.rb +202 -0
  19. data/lib/archsight/import/handlers/jira_base.rb +189 -0
  20. data/lib/archsight/import/handlers/jira_discover.rb +161 -0
  21. data/lib/archsight/import/handlers/jira_metrics.rb +179 -0
  22. data/lib/archsight/import/handlers/openapi_schema_parser.rb +279 -0
  23. data/lib/archsight/import/handlers/repository.rb +439 -0
  24. data/lib/archsight/import/handlers/rest_api.rb +293 -0
  25. data/lib/archsight/import/handlers/rest_api_index.rb +183 -0
  26. data/lib/archsight/import/progress.rb +91 -0
  27. data/lib/archsight/import/registry.rb +54 -0
  28. data/lib/archsight/import/shared_file_writer.rb +67 -0
  29. data/lib/archsight/import/team_matcher.rb +195 -0
  30. data/lib/archsight/import.rb +14 -0
  31. data/lib/archsight/resources/analysis.rb +91 -0
  32. data/lib/archsight/resources/application_component.rb +2 -2
  33. data/lib/archsight/resources/application_service.rb +12 -12
  34. data/lib/archsight/resources/business_product.rb +12 -12
  35. data/lib/archsight/resources/data_object.rb +1 -1
  36. data/lib/archsight/resources/import.rb +79 -0
  37. data/lib/archsight/resources/technology_artifact.rb +23 -2
  38. data/lib/archsight/version.rb +1 -1
  39. data/lib/archsight/web/api/docs.rb +17 -0
  40. data/lib/archsight/web/api/json_helpers.rb +164 -0
  41. data/lib/archsight/web/api/openapi/spec.yaml +500 -0
  42. data/lib/archsight/web/api/routes.rb +101 -0
  43. data/lib/archsight/web/application.rb +66 -43
  44. data/lib/archsight/web/doc/import.md +458 -0
  45. data/lib/archsight/web/doc/index.md.erb +1 -0
  46. data/lib/archsight/web/public/css/artifact.css +10 -0
  47. data/lib/archsight/web/public/css/graph.css +14 -0
  48. data/lib/archsight/web/public/css/instance.css +489 -0
  49. data/lib/archsight/web/views/api_docs.erb +19 -0
  50. data/lib/archsight/web/views/partials/artifact/_project_estimate.haml +14 -8
  51. data/lib/archsight/web/views/partials/instance/_analysis_detail.haml +74 -0
  52. data/lib/archsight/web/views/partials/instance/_analysis_result.haml +64 -0
  53. data/lib/archsight/web/views/partials/instance/_detail.haml +7 -3
  54. data/lib/archsight/web/views/partials/instance/_import_detail.haml +87 -0
  55. data/lib/archsight/web/views/partials/instance/_relations.haml +4 -4
  56. data/lib/archsight/web/views/partials/layout/_content.haml +4 -0
  57. data/lib/archsight/web/views/partials/layout/_navigation.haml +6 -5
  58. metadata +78 -1
@@ -0,0 +1,439 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "open3"
4
+ require "json"
5
+ require "fileutils"
6
+ require_relative "../handler"
7
+ require_relative "../registry"
8
+ require_relative "../git_analytics"
9
+ require_relative "../team_matcher"
10
+
11
+ # Repository handler - clones/syncs and analyzes a git repository, generates a TechnologyArtifact
12
+ #
13
+ # Configuration:
14
+ # import/config/path - Path where the git repository should be cloned
15
+ # import/config/gitUrl - Git URL to clone from (if not already cloned)
16
+ # import/config/archived - Optional "true" if repository is archived
17
+ # import/config/visibility - Optional visibility (internal, public, open-source)
18
+ # import/config/sccPath - Optional path to scc binary (default: scc)
19
+ # import/config/fallbackTeam - Optional team name when no contributor match found
20
+ # import/config/botTeam - Optional team name for bot-only repositories
21
+ class Archsight::Import::Handlers::Repository < Archsight::Import::Handler
22
+ def execute
23
+ @path = config("path")
24
+ @git_url = config("gitUrl")
25
+ raise "Missing required config: path" unless @path
26
+
27
+ # Clone or update the repository if gitUrl is provided
28
+ if @git_url
29
+ begin
30
+ sync_repository
31
+ if @skip_analysis
32
+ write_generates_meta
33
+ return
34
+ end
35
+ rescue StandardError => e
36
+ # Access denied or other git errors - create minimal artifact
37
+ if access_denied_error?(e.message)
38
+ progress.update("Access denied - creating minimal artifact")
39
+ write_minimal_artifact(
40
+ status: "inaccessible",
41
+ reason: "Repository not accessible",
42
+ error: e.message,
43
+ visibility: "private"
44
+ )
45
+ write_generates_meta
46
+ return
47
+ end
48
+ raise
49
+ end
50
+ end
51
+
52
+ raise "Directory not found: #{@path}" unless File.directory?(@path)
53
+ raise "Not a git repository: #{@path}" unless File.directory?(File.join(@path, ".git"))
54
+
55
+ # Check if empty repository (no code)
56
+ progress.update("Analyzing code")
57
+ scc_data = run_scc(@path)
58
+ estimated_cost = scc_data["estimatedCost"]
59
+ if !estimated_cost.nil? && estimated_cost.to_f.zero?
60
+ progress.update("No analyzable code - creating minimal artifact")
61
+ write_minimal_artifact(
62
+ status: "no-code",
63
+ reason: "No analyzable source code found"
64
+ )
65
+ write_generates_meta
66
+ return
67
+ end
68
+
69
+ # Run native git analytics
70
+ progress.update("Analyzing git history")
71
+ git_data = run_git_analytics(@path)
72
+
73
+ # Match contributors to teams
74
+ progress.update("Matching teams")
75
+ team_result = match_teams(git_data["top_contributors"], git_data["activity_status"])
76
+
77
+ # Build resource
78
+ progress.update("Generating resource")
79
+ resource = build_technology_artifact(@path, scc_data, git_data, team_result)
80
+
81
+ # Write output with self-marker for caching
82
+ yaml_content = YAML.dump(resource) + YAML.dump(self_marker)
83
+ write_yaml(yaml_content)
84
+
85
+ write_generates_meta
86
+ end
87
+
88
+ def sync_repository
89
+ if File.directory?(File.join(@path, ".git"))
90
+ # Update existing repository
91
+ progress.update("Updating repository")
92
+ update_repository
93
+ else
94
+ # Clone new repository
95
+ progress.update("Cloning repository")
96
+ clone_repository
97
+ end
98
+
99
+ # Check if repository is empty (no commits)
100
+ return unless empty_repository?
101
+
102
+ progress.update("Empty repository - creating minimal artifact")
103
+ write_minimal_artifact(
104
+ status: "empty",
105
+ reason: "Repository has no commits"
106
+ )
107
+ @skip_analysis = true
108
+ end
109
+
110
+ def clone_repository
111
+ FileUtils.mkdir_p(File.dirname(@path))
112
+ run_git(%w[git clone --quiet] + [@git_url, @path], Dir.pwd)
113
+ end
114
+
115
+ def update_repository
116
+ run_git(%w[git fetch --quiet], @path)
117
+ return if empty_repository? # Skip merge for empty repos
118
+
119
+ # Check if update is needed
120
+ current_head = run_git(%w[git rev-parse HEAD], @path).strip
121
+ fetch_head = run_git(%w[git rev-parse FETCH_HEAD], @path).strip
122
+ return if current_head == fetch_head # Already up-to-date
123
+
124
+ run_git(%w[git merge --ff-only FETCH_HEAD], @path)
125
+ rescue StandardError => e
126
+ # If merge fails (diverged history), reset to remote state
127
+ progress.warn("Merge failed: #{e.message}, resetting to remote")
128
+ run_git(%w[git reset --hard FETCH_HEAD], @path)
129
+ end
130
+
131
+ def empty_repository?
132
+ # Check if HEAD exists (empty repos have no commits)
133
+ _, _, status = Open3.capture3("git", "rev-parse", "HEAD", chdir: @path)
134
+ !status.success?
135
+ end
136
+
137
+ # Run a git command safely using array form to prevent shell injection
138
+ # @param command [Array<String>] Command and arguments as array
139
+ # @param dir [String] Working directory
140
+ # @return [String] Command output
141
+ def run_git(command, dir)
142
+ out, err, status = Open3.capture3(*command, chdir: dir)
143
+ raise "Git command failed: #{sanitize_error(err)}" unless status.success?
144
+
145
+ out
146
+ end
147
+
148
+ # Sanitize error message to prevent breaking TTY progress display
149
+ def sanitize_error(message)
150
+ return "" if message.nil? || message.empty?
151
+
152
+ # Take first meaningful line, strip ANSI codes and remote prefixes
153
+ lines = message.lines.map(&:strip).reject { |l| l.empty? || l.start_with?("remote:") }
154
+ first_line = lines.first || message.lines.first&.strip || ""
155
+
156
+ # Truncate if too long
157
+ first_line.length > 100 ? "#{first_line[0, 97]}..." : first_line
158
+ end
159
+
160
+ # Check if error message indicates access denied
161
+ def access_denied_error?(message)
162
+ return false if message.nil?
163
+
164
+ patterns = [
165
+ /could not read from remote repository/i,
166
+ /permission denied/i,
167
+ /access denied/i,
168
+ /authentication failed/i,
169
+ /repository not found/i,
170
+ /fatal: '.*' does not appear to be a git repository/i
171
+ ]
172
+ patterns.any? { |p| message.match?(p) }
173
+ end
174
+
175
+ # Write a minimal TechnologyArtifact for repositories that can't be fully analyzed
176
+ # @param status [String] Activity status (inaccessible, empty, no-code)
177
+ # @param reason [String] Human-readable reason
178
+ # @param error [String, nil] Optional error message
179
+ # @param visibility [String] Repository visibility (default: from config or "internal")
180
+ def write_minimal_artifact(status:, reason:, error: nil, visibility: nil)
181
+ git_url = @git_url
182
+ vis = visibility || config("visibility", default: "internal")
183
+
184
+ annotations = {
185
+ "artifact/type" => "repo",
186
+ "repository/git" => git_url,
187
+ "repository/visibility" => vis,
188
+ "activity/status" => status,
189
+ "activity/reason" => reason,
190
+ "generated/script" => import_resource.name,
191
+ "generated/at" => Time.now.utc.iso8601
192
+ }
193
+
194
+ annotations["repository/accessible"] = "false" if status == "inaccessible"
195
+ annotations["repository/error"] = sanitize_error(error) if error
196
+
197
+ resource = resource_yaml(
198
+ kind: "TechnologyArtifact",
199
+ name: repository_name(git_url),
200
+ annotations: annotations,
201
+ spec: {}
202
+ )
203
+
204
+ # Write output with self-marker for caching
205
+ yaml_content = YAML.dump(resource) + YAML.dump(self_marker)
206
+ write_yaml(yaml_content)
207
+ end
208
+
209
+ private
210
+
211
+ def run_scc(path)
212
+ cmd = ["scc", "-f", "json2", "--sort", "name", path]
213
+
214
+ out, err, status = Open3.capture3(*cmd)
215
+ raise "scc failed: #{cmd.join(" ")}\n#{err}" unless status.success?
216
+
217
+ return empty_scc_result if out.strip.empty?
218
+
219
+ JSON.parse(out)
220
+ rescue JSON::ParserError => e
221
+ raise "Failed to parse scc output for #{path}: #{e.message}"
222
+ end
223
+
224
+ def empty_scc_result
225
+ {
226
+ "languageSummary" => [],
227
+ "estimatedCost" => 0,
228
+ "estimatedPeople" => 0,
229
+ "estimatedScheduleMonths" => 0
230
+ }
231
+ end
232
+
233
+ def run_git_analytics(path)
234
+ Archsight::Import::GitAnalytics.new(path).analyze
235
+ rescue StandardError => e
236
+ progress.warn("Git analytics failed: #{e.message}")
237
+ empty_git_analytics_result
238
+ end
239
+
240
+ def empty_git_analytics_result
241
+ {
242
+ "activity_status" => "unknown",
243
+ "bus_factor_risk" => "unknown",
244
+ "commits_per_month" => [],
245
+ "contributors_per_month" => [],
246
+ "contributors_6m" => 0,
247
+ "contributors" => 0,
248
+ "top_contributors" => [],
249
+ "deployment_types" => "none",
250
+ "workflow_platforms" => "none",
251
+ "workflow_types" => "none",
252
+ "agentic_tools" => "none"
253
+ }
254
+ end
255
+
256
+ def match_teams(top_contributors, activity_status)
257
+ return nil unless database && top_contributors&.any?
258
+
259
+ matcher = Archsight::Import::TeamMatcher.new(database)
260
+ result = matcher.analyze(top_contributors)
261
+
262
+ # Apply fallbacks from config
263
+ if result[:maintainer].nil?
264
+ fallback = if activity_status == "bot-only"
265
+ config("botTeam") || config("fallbackTeam")
266
+ else
267
+ config("fallbackTeam")
268
+ end
269
+ result[:maintainer] = fallback
270
+ end
271
+
272
+ result
273
+ end
274
+
275
+ def build_technology_artifact(path, scc_data, git_data, team_result = nil)
276
+ annotations = {}
277
+
278
+ # Artifact type
279
+ annotations["artifact/type"] = "repo"
280
+
281
+ # Repository URL from git config
282
+ git_url = extract_git_url(path)
283
+ annotations["repository/git"] = git_url if git_url
284
+
285
+ # Visibility
286
+ visibility = config("visibility", default: determine_visibility(git_url))
287
+ annotations["repository/visibility"] = visibility
288
+
289
+ # SCC metrics
290
+ annotations.merge!(build_scc_annotations(scc_data))
291
+
292
+ # Git activity metrics
293
+ annotations.merge!(build_activity_annotations(git_data))
294
+
295
+ # Deployment annotations
296
+ annotations.merge!(build_deployment_annotations(git_data))
297
+
298
+ # Generated metadata
299
+ annotations["generated/script"] = import_resource.name
300
+ annotations["generated/at"] = Time.now.utc.iso8601
301
+
302
+ # Build spec
303
+ spec = {}
304
+
305
+ # Technology component (Git provider)
306
+ if git_url
307
+ provider = git_url.include?("github") ? "Git:Github" : "Git:Gitlab"
308
+ spec["suppliedBy"] = { "technologyComponents" => [provider] }
309
+ end
310
+
311
+ # Team relations from contributor matching
312
+ if team_result
313
+ spec["maintainedBy"] = { "businessActors" => [team_result[:maintainer]] } if team_result[:maintainer]
314
+ spec["contributedBy"] = { "businessActors" => team_result[:contributors] } if team_result[:contributors]&.any?
315
+ end
316
+
317
+ resource_yaml(
318
+ kind: "TechnologyArtifact",
319
+ name: repository_name(git_url || path),
320
+ annotations: annotations,
321
+ spec: spec
322
+ )
323
+ end
324
+
325
+ def extract_git_url(path)
326
+ config_path = File.join(path, ".git", "config")
327
+ return nil unless File.exist?(config_path)
328
+
329
+ config_content = File.read(config_path)
330
+ url_line = config_content.lines.find { |l| l.include?("url") }
331
+ return nil unless url_line
332
+
333
+ url_line.split("=").last.strip
334
+ end
335
+
336
+ def repository_name(git_url_or_path)
337
+ if git_url_or_path.include?(":")
338
+ # Git URL format
339
+ name = git_url_or_path.split(":").last.gsub(/.git$/, "").gsub(%r{/}, ":")
340
+ "Repo:#{name}"
341
+ else
342
+ # Path format - use directory name
343
+ "Repo:#{File.basename(git_url_or_path)}"
344
+ end
345
+ end
346
+
347
+ def determine_visibility(git_url)
348
+ return "internal" unless git_url
349
+ return "internal" unless git_url.include?("github")
350
+
351
+ # Default to internal, can be overridden by config
352
+ "internal"
353
+ end
354
+
355
+ def build_scc_annotations(scc_data)
356
+ annotations = {}
357
+
358
+ languages = (scc_data["languageSummary"] || []).map { |l| l["Name"] }
359
+ annotations["scc/languages"] = languages.join(",") unless languages.empty?
360
+
361
+ annotations["scc/estimatedCost"] = format("%.2f", scc_data["estimatedCost"].to_f)
362
+ annotations["scc/estimatedScheduleMonths"] = format("%.2f", scc_data["estimatedScheduleMonths"].to_f)
363
+ annotations["scc/estimatedPeople"] = format("%.2f", scc_data["estimatedPeople"].to_f)
364
+
365
+ # Per-language LOC
366
+ (scc_data["languageSummary"] || []).each do |lang|
367
+ annotations["scc/language/#{lang["Name"]}/loc"] = lang["Code"].to_s
368
+ end
369
+
370
+ annotations
371
+ end
372
+
373
+ def build_activity_annotations(git_data)
374
+ annotations = {}
375
+
376
+ # Activity status - check if archived first
377
+ archived = config("archived") == "true"
378
+ activity_status = archived ? "archived" : (git_data["activity_status"] || "unknown")
379
+ annotations["activity/status"] = activity_status
380
+
381
+ # Commit metrics
382
+ annotations["activity/commits"] = git_data["commits_per_month"].join(",") if git_data["commits_per_month"]&.any?
383
+
384
+ # Contributor metrics
385
+ annotations["activity/contributors"] = git_data["contributors_per_month"].join(",") if git_data["contributors_per_month"]&.any?
386
+ annotations["activity/contributors/6m"] = git_data["contributors_6m"].to_s if git_data["contributors_6m"]
387
+ annotations["activity/contributors/total"] = git_data["contributors"].to_s if git_data["contributors"]
388
+
389
+ # Health metrics
390
+ annotations["activity/busFactor"] = git_data["bus_factor_risk"] if git_data["bus_factor_risk"]
391
+ annotations["agentic/tools"] = git_data["agentic_tools"] if git_data["agentic_tools"]
392
+
393
+ # Timestamps
394
+ annotations["activity/createdAt"] = git_data["created_at"] if git_data["created_at"]
395
+ annotations["activity/lastHumanCommit"] = git_data["last_human_commit"] if git_data["last_human_commit"]
396
+
397
+ # Recent tags (for release info)
398
+ if git_data["recent_tags"]&.any?
399
+ tag_names = git_data["recent_tags"].map { |t| t["name"] }
400
+ annotations["repository/recentTags"] = tag_names.join(",")
401
+ end
402
+
403
+ annotations
404
+ end
405
+
406
+ def build_deployment_annotations(git_data)
407
+ annotations = {}
408
+
409
+ annotations["repository/artifacts"] = git_data["deployment_types"] if git_data["deployment_types"]
410
+ annotations["workflow/platforms"] = git_data["workflow_platforms"] if git_data["workflow_platforms"]
411
+ annotations["workflow/types"] = git_data["workflow_types"] if git_data["workflow_types"]
412
+
413
+ annotations["deployment/images"] = git_data["oci_images"].join(",") if git_data["oci_images"]&.any?
414
+
415
+ annotations["architecture/description"] = git_data["description"] if git_data["description"] && !git_data["description"].empty?
416
+
417
+ # Documentation links (handle potential key collisions)
418
+ (git_data["documentation_links"] || []).each do |link|
419
+ base_name = if link["text"] && !link["text"].empty?
420
+ link["text"]
421
+ else
422
+ link["url"].sub(%r{^https?://}, "").gsub("/", "-")
423
+ end
424
+
425
+ # Find unique key by adding numeric suffix if needed
426
+ key = "link/#{base_name}"
427
+ if annotations.key?(key)
428
+ counter = 2
429
+ counter += 1 while annotations.key?("link/#{base_name}-#{counter}")
430
+ key = "link/#{base_name}-#{counter}"
431
+ end
432
+ annotations[key] = link["url"]
433
+ end
434
+
435
+ annotations
436
+ end
437
+ end
438
+
439
+ Archsight::Import::Registry.register("repository", Archsight::Import::Handlers::Repository)