aidp 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -214
  3. data/bin/aidp +1 -1
  4. data/lib/aidp/analysis/kb_inspector.rb +38 -23
  5. data/lib/aidp/analysis/seams.rb +2 -31
  6. data/lib/aidp/analysis/tree_sitter_grammar_loader.rb +1 -13
  7. data/lib/aidp/analysis/tree_sitter_scan.rb +3 -20
  8. data/lib/aidp/analyze/error_handler.rb +2 -75
  9. data/lib/aidp/analyze/json_file_storage.rb +292 -0
  10. data/lib/aidp/analyze/progress.rb +12 -0
  11. data/lib/aidp/analyze/progress_visualizer.rb +12 -17
  12. data/lib/aidp/analyze/ruby_maat_integration.rb +13 -31
  13. data/lib/aidp/analyze/runner.rb +256 -87
  14. data/lib/aidp/cli/jobs_command.rb +100 -432
  15. data/lib/aidp/cli.rb +309 -239
  16. data/lib/aidp/config.rb +298 -10
  17. data/lib/aidp/debug_logger.rb +195 -0
  18. data/lib/aidp/debug_mixin.rb +187 -0
  19. data/lib/aidp/execute/progress.rb +9 -0
  20. data/lib/aidp/execute/runner.rb +221 -40
  21. data/lib/aidp/execute/steps.rb +17 -7
  22. data/lib/aidp/execute/workflow_selector.rb +211 -0
  23. data/lib/aidp/harness/completion_checker.rb +268 -0
  24. data/lib/aidp/harness/condition_detector.rb +1526 -0
  25. data/lib/aidp/harness/config_loader.rb +373 -0
  26. data/lib/aidp/harness/config_manager.rb +382 -0
  27. data/lib/aidp/harness/config_schema.rb +1006 -0
  28. data/lib/aidp/harness/config_validator.rb +355 -0
  29. data/lib/aidp/harness/configuration.rb +477 -0
  30. data/lib/aidp/harness/enhanced_runner.rb +494 -0
  31. data/lib/aidp/harness/error_handler.rb +616 -0
  32. data/lib/aidp/harness/provider_config.rb +423 -0
  33. data/lib/aidp/harness/provider_factory.rb +306 -0
  34. data/lib/aidp/harness/provider_manager.rb +1269 -0
  35. data/lib/aidp/harness/provider_type_checker.rb +88 -0
  36. data/lib/aidp/harness/runner.rb +411 -0
  37. data/lib/aidp/harness/state/errors.rb +28 -0
  38. data/lib/aidp/harness/state/metrics.rb +219 -0
  39. data/lib/aidp/harness/state/persistence.rb +128 -0
  40. data/lib/aidp/harness/state/provider_state.rb +132 -0
  41. data/lib/aidp/harness/state/ui_state.rb +68 -0
  42. data/lib/aidp/harness/state/workflow_state.rb +123 -0
  43. data/lib/aidp/harness/state_manager.rb +586 -0
  44. data/lib/aidp/harness/status_display.rb +888 -0
  45. data/lib/aidp/harness/ui/base.rb +16 -0
  46. data/lib/aidp/harness/ui/enhanced_tui.rb +545 -0
  47. data/lib/aidp/harness/ui/enhanced_workflow_selector.rb +252 -0
  48. data/lib/aidp/harness/ui/error_handler.rb +132 -0
  49. data/lib/aidp/harness/ui/frame_manager.rb +361 -0
  50. data/lib/aidp/harness/ui/job_monitor.rb +500 -0
  51. data/lib/aidp/harness/ui/navigation/main_menu.rb +311 -0
  52. data/lib/aidp/harness/ui/navigation/menu_formatter.rb +120 -0
  53. data/lib/aidp/harness/ui/navigation/menu_item.rb +142 -0
  54. data/lib/aidp/harness/ui/navigation/menu_state.rb +139 -0
  55. data/lib/aidp/harness/ui/navigation/submenu.rb +202 -0
  56. data/lib/aidp/harness/ui/navigation/workflow_selector.rb +176 -0
  57. data/lib/aidp/harness/ui/progress_display.rb +280 -0
  58. data/lib/aidp/harness/ui/question_collector.rb +141 -0
  59. data/lib/aidp/harness/ui/spinner_group.rb +184 -0
  60. data/lib/aidp/harness/ui/spinner_helper.rb +152 -0
  61. data/lib/aidp/harness/ui/status_manager.rb +312 -0
  62. data/lib/aidp/harness/ui/status_widget.rb +280 -0
  63. data/lib/aidp/harness/ui/workflow_controller.rb +312 -0
  64. data/lib/aidp/harness/user_interface.rb +2381 -0
  65. data/lib/aidp/provider_manager.rb +131 -7
  66. data/lib/aidp/providers/anthropic.rb +28 -103
  67. data/lib/aidp/providers/base.rb +170 -0
  68. data/lib/aidp/providers/cursor.rb +52 -181
  69. data/lib/aidp/providers/gemini.rb +24 -107
  70. data/lib/aidp/providers/macos_ui.rb +99 -5
  71. data/lib/aidp/providers/opencode.rb +194 -0
  72. data/lib/aidp/storage/csv_storage.rb +172 -0
  73. data/lib/aidp/storage/file_manager.rb +214 -0
  74. data/lib/aidp/storage/json_storage.rb +140 -0
  75. data/lib/aidp/version.rb +1 -1
  76. data/lib/aidp.rb +54 -39
  77. data/templates/COMMON/AGENT_BASE.md +11 -0
  78. data/templates/EXECUTE/00_PRD.md +4 -4
  79. data/templates/EXECUTE/02_ARCHITECTURE.md +5 -4
  80. data/templates/EXECUTE/07_TEST_PLAN.md +4 -1
  81. data/templates/EXECUTE/08_TASKS.md +4 -4
  82. data/templates/EXECUTE/10_IMPLEMENTATION_AGENT.md +4 -4
  83. data/templates/README.md +279 -0
  84. data/templates/aidp-development.yml.example +373 -0
  85. data/templates/aidp-minimal.yml.example +48 -0
  86. data/templates/aidp-production.yml.example +475 -0
  87. data/templates/aidp.yml.example +598 -0
  88. metadata +93 -69
  89. data/lib/aidp/analyze/agent_personas.rb +0 -71
  90. data/lib/aidp/analyze/agent_tool_executor.rb +0 -439
  91. data/lib/aidp/analyze/data_retention_manager.rb +0 -421
  92. data/lib/aidp/analyze/database.rb +0 -260
  93. data/lib/aidp/analyze/dependencies.rb +0 -335
  94. data/lib/aidp/analyze/export_manager.rb +0 -418
  95. data/lib/aidp/analyze/focus_guidance.rb +0 -517
  96. data/lib/aidp/analyze/incremental_analyzer.rb +0 -533
  97. data/lib/aidp/analyze/language_analysis_strategies.rb +0 -897
  98. data/lib/aidp/analyze/large_analysis_progress.rb +0 -499
  99. data/lib/aidp/analyze/memory_manager.rb +0 -339
  100. data/lib/aidp/analyze/metrics_storage.rb +0 -336
  101. data/lib/aidp/analyze/parallel_processor.rb +0 -454
  102. data/lib/aidp/analyze/performance_optimizer.rb +0 -691
  103. data/lib/aidp/analyze/repository_chunker.rb +0 -697
  104. data/lib/aidp/analyze/static_analysis_detector.rb +0 -577
  105. data/lib/aidp/analyze/storage.rb +0 -655
  106. data/lib/aidp/analyze/tool_configuration.rb +0 -441
  107. data/lib/aidp/analyze/tool_modernization.rb +0 -750
  108. data/lib/aidp/database/pg_adapter.rb +0 -148
  109. data/lib/aidp/database_config.rb +0 -69
  110. data/lib/aidp/database_connection.rb +0 -72
  111. data/lib/aidp/job_manager.rb +0 -41
  112. data/lib/aidp/jobs/base_job.rb +0 -45
  113. data/lib/aidp/jobs/provider_execution_job.rb +0 -83
  114. data/lib/aidp/project_detector.rb +0 -117
  115. data/lib/aidp/providers/agent_supervisor.rb +0 -348
  116. data/lib/aidp/providers/supervised_base.rb +0 -317
  117. data/lib/aidp/providers/supervised_cursor.rb +0 -22
  118. data/lib/aidp/sync.rb +0 -13
  119. data/lib/aidp/workspace.rb +0 -19
@@ -1,697 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "json"
4
- require "yaml"
5
-
6
- module Aidp
7
- module Analyze
8
- class RepositoryChunker
9
- # Chunking strategies
10
- CHUNKING_STRATEGIES = %w[time_based commit_count size_based feature_based].freeze
11
-
12
- # Default chunking configuration
13
- DEFAULT_CHUNK_CONFIG = {
14
- "time_based" => {
15
- "chunk_size" => "30d", # 30 days
16
- "overlap" => "7d" # 7 days overlap
17
- },
18
- "commit_count" => {
19
- "chunk_size" => 1000, # 1000 commits per chunk
20
- "overlap" => 100 # 100 commits overlap
21
- },
22
- "size_based" => {
23
- "chunk_size" => "100MB", # 100MB per chunk
24
- "overlap" => "10MB" # 10MB overlap
25
- },
26
- "feature_based" => {
27
- "max_files_per_chunk" => 500,
28
- "max_commits_per_chunk" => 500
29
- }
30
- }.freeze
31
-
32
- def initialize(project_dir = Dir.pwd, config = {})
33
- @project_dir = project_dir
34
- @config = config
35
- @chunk_config = load_chunk_config
36
- end
37
-
38
- # Chunk repository for analysis
39
- def chunk_repository(strategy = "time_based", options = {})
40
- strategy_config = @chunk_config[strategy] || DEFAULT_CHUNK_CONFIG[strategy]
41
-
42
- case strategy
43
- when "time_based"
44
- chunk_by_time(strategy_config, options)
45
- when "commit_count"
46
- chunk_by_commit_count(strategy_config, options)
47
- when "size_based"
48
- chunk_by_size(strategy_config, options)
49
- when "feature_based"
50
- chunk_by_features(strategy_config, options)
51
- else
52
- raise "Unknown chunking strategy: #{strategy}"
53
- end
54
- end
55
-
56
- # Chunk by time periods
57
- def chunk_by_time(config, options = {})
58
- chunk_size = parse_time_duration(config["chunk_size"])
59
- overlap = parse_time_duration(config["overlap"])
60
-
61
- # Get repository time range
62
- time_range = get_repository_time_range
63
- return [] if time_range.empty?
64
-
65
- chunks = []
66
- current_start = time_range[:start]
67
-
68
- while current_start < time_range[:end]
69
- current_end = [current_start + chunk_size, time_range[:end]].min
70
-
71
- chunk = {
72
- id: generate_chunk_id("time", current_start),
73
- strategy: "time_based",
74
- start_time: current_start,
75
- end_time: current_end,
76
- duration: current_end - current_start,
77
- commits: get_commits_in_time_range(current_start, current_end),
78
- files: get_files_in_time_range(current_start, current_end),
79
- overlap: overlap
80
- }
81
-
82
- chunks << chunk
83
- current_start = current_end - overlap
84
- end
85
-
86
- {
87
- strategy: "time_based",
88
- total_chunks: chunks.length,
89
- chunks: chunks,
90
- time_range: time_range,
91
- config: config
92
- }
93
- end
94
-
95
- # Chunk by commit count
96
- def chunk_by_commit_count(config, options = {})
97
- chunk_size = config["chunk_size"] || 1000
98
- overlap = config["overlap"] || 100
99
-
100
- # Get all commits
101
- all_commits = get_all_commits
102
- return [] if all_commits.empty?
103
-
104
- chunks = []
105
- total_commits = all_commits.length
106
-
107
- (0...total_commits).step(chunk_size - overlap) do |start_index|
108
- end_index = [start_index + chunk_size, total_commits].min
109
-
110
- chunk_commits = all_commits[start_index...end_index]
111
-
112
- chunk = {
113
- id: generate_chunk_id("commit", start_index),
114
- strategy: "commit_count",
115
- start_index: start_index,
116
- end_index: end_index,
117
- commit_count: chunk_commits.length,
118
- commits: chunk_commits,
119
- files: get_files_for_commits(chunk_commits),
120
- overlap: overlap
121
- }
122
-
123
- chunks << chunk
124
- end
125
-
126
- {
127
- strategy: "commit_count",
128
- total_chunks: chunks.length,
129
- chunks: chunks,
130
- total_commits: total_commits,
131
- config: config
132
- }
133
- end
134
-
135
- # Chunk by repository size
136
- def chunk_by_size(config, options = {})
137
- chunk_size = parse_size(config["chunk_size"])
138
- parse_size(config["overlap"])
139
-
140
- # Get repository structure
141
- repo_structure = analyze_repository_structure
142
- return [] if repo_structure.empty?
143
-
144
- chunks = []
145
- current_chunk = {
146
- id: generate_chunk_id("size", chunks.length),
147
- strategy: "size_based",
148
- files: [],
149
- size: 0,
150
- directories: []
151
- }
152
-
153
- repo_structure.each do |item|
154
- if current_chunk[:size] + item[:size] > chunk_size
155
- # Current chunk is full, save it and start new one
156
- chunks << current_chunk
157
- current_chunk = {
158
- id: generate_chunk_id("size", chunks.length),
159
- strategy: "size_based",
160
- files: [],
161
- size: 0,
162
- directories: []
163
- }
164
- end
165
-
166
- current_chunk[:files] << item[:path]
167
- current_chunk[:size] += item[:size]
168
- current_chunk[:directories] << File.dirname(item[:path])
169
- end
170
-
171
- # Add the last chunk if it has content
172
- chunks << current_chunk if current_chunk[:files].any?
173
-
174
- {
175
- strategy: "size_based",
176
- total_chunks: chunks.length,
177
- chunks: chunks,
178
- total_size: repo_structure.sum { |item| item[:size] },
179
- config: config
180
- }
181
- end
182
-
183
- # Chunk by features/components
184
- def chunk_by_features(config, options = {})
185
- max_files = config["max_files_per_chunk"] || 500
186
- max_commits = config["max_commits_per_chunk"] || 500
187
-
188
- # Identify features/components
189
- features = identify_features
190
- return [] if features.empty?
191
-
192
- chunks = []
193
-
194
- features.each do |feature|
195
- feature_files = get_feature_files(feature)
196
- feature_commits = get_feature_commits(feature)
197
-
198
- # Split large features into chunks
199
- if feature_files.length > max_files || feature_commits.length > max_commits
200
- feature_chunks = split_large_feature(feature, feature_files, feature_commits, max_files, max_commits)
201
- chunks.concat(feature_chunks)
202
- else
203
- chunk = {
204
- id: generate_chunk_id("feature", feature[:name]),
205
- strategy: "feature_based",
206
- feature: feature,
207
- files: feature_files,
208
- commits: feature_commits,
209
- file_count: feature_files.length,
210
- commit_count: feature_commits.length
211
- }
212
- chunks << chunk
213
- end
214
- end
215
-
216
- {
217
- strategy: "feature_based",
218
- total_chunks: chunks.length,
219
- chunks: chunks,
220
- features: features,
221
- config: config
222
- }
223
- end
224
-
225
- # Get chunk analysis plan
226
- def get_chunk_analysis_plan(chunks, analysis_type, options = {})
227
- plan = {
228
- analysis_type: analysis_type,
229
- total_chunks: chunks.length,
230
- chunks: [],
231
- estimated_duration: 0,
232
- dependencies: []
233
- }
234
-
235
- chunks.each_with_index do |chunk, index|
236
- chunk_plan = {
237
- chunk_id: chunk[:id],
238
- chunk_index: index,
239
- strategy: chunk[:strategy],
240
- estimated_duration: estimate_chunk_analysis_duration(chunk, analysis_type),
241
- dependencies: get_chunk_dependencies(chunk, chunks),
242
- priority: calculate_chunk_priority(chunk, analysis_type),
243
- resources: estimate_chunk_resources(chunk, analysis_type)
244
- }
245
-
246
- plan[:chunks] << chunk_plan
247
- plan[:estimated_duration] += chunk_plan[:estimated_duration]
248
- end
249
-
250
- # Sort chunks by priority
251
- plan[:chunks].sort_by! { |chunk| -chunk[:priority] }
252
-
253
- plan
254
- end
255
-
256
- # Execute chunk analysis
257
- def execute_chunk_analysis(chunk, analysis_type, options = {})
258
- start_time = Time.now
259
-
260
- results = {
261
- chunk_id: chunk[:id],
262
- analysis_type: analysis_type,
263
- start_time: start_time,
264
- status: "running"
265
- }
266
-
267
- # Perform analysis based on chunk type
268
- case chunk[:strategy]
269
- when "time_based"
270
- results[:data] = analyze_time_chunk(chunk, analysis_type, options)
271
- when "commit_count"
272
- results[:data] = analyze_commit_chunk(chunk, analysis_type, options)
273
- when "size_based"
274
- results[:data] = analyze_size_chunk(chunk, analysis_type, options)
275
- when "feature_based"
276
- results[:data] = analyze_feature_chunk(chunk, analysis_type, options)
277
- end
278
-
279
- results[:status] = "completed"
280
- results[:end_time] = Time.now
281
- results[:duration] = results[:end_time] - results[:start_time]
282
-
283
- results
284
- end
285
-
286
- # Merge chunk analysis results
287
- def merge_chunk_results(chunk_results, options = {})
288
- merged = {
289
- total_chunks: chunk_results.length,
290
- successful_chunks: chunk_results.count { |r| r[:status] == "completed" },
291
- failed_chunks: chunk_results.count { |r| r[:status] == "failed" },
292
- total_duration: chunk_results.sum { |r| r[:duration] || 0 },
293
- merged_data: {},
294
- errors: []
295
- }
296
-
297
- # Collect errors
298
- chunk_results.each do |result|
299
- next unless result[:status] == "failed"
300
-
301
- merged[:errors] << {
302
- chunk_id: result[:chunk_id],
303
- error: result[:error]
304
- }
305
- end
306
-
307
- # Merge successful results
308
- successful_results = chunk_results.select { |r| r[:status] == "completed" }
309
-
310
- successful_results.each do |result|
311
- merge_chunk_data(merged[:merged_data], result[:data])
312
- end
313
-
314
- merged
315
- end
316
-
317
- # Get chunk statistics
318
- def get_chunk_statistics(chunks)
319
- return {} if chunks.empty?
320
-
321
- stats = {
322
- total_chunks: chunks.length,
323
- strategies: chunks.map { |c| c[:strategy] }.tally,
324
- total_files: chunks.sum { |c| c[:files]&.length || 0 },
325
- total_commits: chunks.sum { |c| c[:commits]&.length || 0 },
326
- average_chunk_size: calculate_average_chunk_size(chunks),
327
- chunk_distribution: analyze_chunk_distribution(chunks)
328
- }
329
-
330
- # Strategy-specific statistics
331
- strategies = chunks.map { |c| c[:strategy] }.uniq
332
- strategies.each do |strategy|
333
- strategy_chunks = chunks.select { |c| c[:strategy] == strategy }
334
- stats["#{strategy}_stats"] = get_strategy_statistics(strategy_chunks, strategy)
335
- end
336
-
337
- stats
338
- end
339
-
340
- private
341
-
342
- def load_chunk_config
343
- config_file = File.join(@project_dir, ".aidp-chunk-config.yml")
344
-
345
- if File.exist?(config_file)
346
- YAML.load_file(config_file) || DEFAULT_CHUNK_CONFIG
347
- else
348
- DEFAULT_CHUNK_CONFIG
349
- end
350
- end
351
-
352
- def parse_time_duration(duration_str)
353
- # Parse duration strings like "30d", "7d", "1w", etc.
354
- # Use anchored patterns with limited digit repetition to prevent ReDoS
355
- case duration_str.to_s.strip
356
- when /\A(\d{1,6})d\z/
357
- ::Regexp.last_match(1).to_i * 24 * 60 * 60
358
- when /\A(\d{1,6})w\z/
359
- ::Regexp.last_match(1).to_i * 7 * 24 * 60 * 60
360
- when /\A(\d{1,6})m\z/
361
- ::Regexp.last_match(1).to_i * 30 * 24 * 60 * 60
362
- when /\A(\d{1,6})y\z/
363
- ::Regexp.last_match(1).to_i * 365 * 24 * 60 * 60
364
- else
365
- 30 * 24 * 60 * 60 # Default to 30 days
366
- end
367
- end
368
-
369
- def parse_size(size_str)
370
- # Parse size strings like "100MB", "1GB", etc.
371
- # Use anchored patterns with limited digit repetition to prevent ReDoS
372
- case size_str.to_s.strip
373
- when /\A(\d{1,10})KB\z/i
374
- ::Regexp.last_match(1).to_i * 1024
375
- when /\A(\d{1,10})MB\z/i
376
- ::Regexp.last_match(1).to_i * 1024 * 1024
377
- when /\A(\d{1,10})GB\z/i
378
- ::Regexp.last_match(1).to_i * 1024 * 1024 * 1024
379
- else
380
- 100 * 1024 * 1024 # Default to 100MB
381
- end
382
- end
383
-
384
- def get_repository_time_range
385
- # Get the time range of the repository
386
- # This would use git commands to get the first and last commit dates
387
- {
388
- start: Time.now - (365 * 24 * 60 * 60), # 1 year ago
389
- end: Time.now
390
- }
391
- end
392
-
393
- def get_commits_in_time_range(start_time, end_time)
394
- # Get commits within the specified time range
395
- # This would use git log with date filtering
396
- []
397
- end
398
-
399
- def get_files_in_time_range(start_time, end_time)
400
- # Get files modified within the specified time range
401
- # This would use git log --name-only with date filtering
402
- []
403
- end
404
-
405
- def get_all_commits
406
- # Get all commits in the repository
407
- # This would use git log
408
- []
409
- end
410
-
411
- def get_files_for_commits(commits)
412
- # Get files modified in the specified commits
413
- # This would use git show --name-only
414
- []
415
- end
416
-
417
- def analyze_repository_structure
418
- # Analyze the repository structure to get file sizes and organization
419
- structure = []
420
-
421
- Dir.glob(File.join(@project_dir, "**", "*")).each do |path|
422
- next unless File.file?(path)
423
-
424
- relative_path = path.sub(@project_dir + "/", "")
425
- structure << {
426
- path: relative_path,
427
- size: File.size(path),
428
- type: File.extname(path)
429
- }
430
- end
431
-
432
- structure
433
- end
434
-
435
- def identify_features
436
- # Identify features/components in the repository
437
- features = []
438
-
439
- # Look for common feature patterns
440
- feature_patterns = [
441
- "app/features/**/*",
442
- "features/**/*",
443
- "src/features/**/*",
444
- "lib/features/**/*"
445
- ]
446
-
447
- feature_patterns.each do |pattern|
448
- Dir.glob(File.join(@project_dir, pattern)).each do |path|
449
- next unless Dir.exist?(path)
450
-
451
- feature_name = File.basename(path)
452
- features << {
453
- name: feature_name,
454
- path: path.sub(@project_dir + "/", ""),
455
- type: "directory"
456
- }
457
- end
458
- end
459
-
460
- features
461
- end
462
-
463
- def get_feature_files(feature)
464
- # Get files associated with a feature
465
- feature_path = File.join(@project_dir, feature[:path])
466
- return [] unless Dir.exist?(feature_path)
467
-
468
- files = []
469
- Dir.glob(File.join(feature_path, "**", "*")).each do |path|
470
- next unless File.file?(path)
471
-
472
- files << path.sub(@project_dir + "/", "")
473
- end
474
-
475
- files
476
- end
477
-
478
- def get_feature_commits(feature)
479
- # Get commits related to a feature
480
- # This would use git log with path filtering
481
- []
482
- end
483
-
484
- def split_large_feature(feature, files, commits, max_files, max_commits)
485
- # Split a large feature into smaller chunks
486
- chunks = []
487
-
488
- # Split by files
489
- files.each_slice(max_files) do |file_chunk|
490
- chunks << {
491
- id: generate_chunk_id("feature", "#{feature[:name]}_files_#{chunks.length}"),
492
- strategy: "feature_based",
493
- feature: feature,
494
- files: file_chunk,
495
- commits: commits,
496
- file_count: file_chunk.length,
497
- commit_count: commits.length
498
- }
499
- end
500
-
501
- chunks
502
- end
503
-
504
- def generate_chunk_id(prefix, identifier)
505
- timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
506
- "#{prefix}_#{identifier}_#{timestamp}"
507
- end
508
-
509
- def estimate_chunk_analysis_duration(chunk, analysis_type)
510
- # Estimate analysis duration based on chunk size and analysis type
511
- base_duration = case analysis_type
512
- when "static_analysis"
513
- 30 # seconds per file
514
- when "security_analysis"
515
- 60 # seconds per file
516
- when "performance_analysis"
517
- 45 # seconds per file
518
- else
519
- 30 # seconds per file
520
- end
521
-
522
- file_count = chunk[:files]&.length || 0
523
- commit_count = chunk[:commits]&.length || 0
524
-
525
- (file_count + commit_count) * base_duration
526
- end
527
-
528
- def get_chunk_dependencies(chunk, all_chunks)
529
- # Get dependencies between chunks
530
- []
531
-
532
- # This would analyze relationships between chunks
533
- # For now, return empty array
534
- end
535
-
536
- def calculate_chunk_priority(chunk, analysis_type)
537
- # Calculate priority for chunk analysis
538
- priority = 0
539
-
540
- # Higher priority for larger chunks
541
- priority += chunk[:files]&.length || 0
542
- priority += chunk[:commits]&.length || 0
543
-
544
- # Higher priority for certain analysis types
545
- case analysis_type
546
- when "security_analysis"
547
- priority *= 2
548
- when "performance_analysis"
549
- priority *= 1.5
550
- end
551
-
552
- priority
553
- end
554
-
555
- def estimate_chunk_resources(chunk, analysis_type)
556
- # Estimate resource requirements for chunk analysis
557
- {
558
- memory: estimate_memory_usage(chunk),
559
- cpu: estimate_cpu_usage(chunk, analysis_type),
560
- disk: estimate_disk_usage(chunk)
561
- }
562
- end
563
-
564
- def estimate_memory_usage(chunk)
565
- # Estimate memory usage based on chunk size
566
- file_count = chunk[:files]&.length || 0
567
- commit_count = chunk[:commits]&.length || 0
568
-
569
- (file_count + commit_count) * 1024 * 1024 # 1MB per item
570
- end
571
-
572
- def estimate_cpu_usage(chunk, analysis_type)
573
- # Estimate CPU usage based on analysis type
574
- case analysis_type
575
- when "static_analysis"
576
- "medium"
577
- when "security_analysis"
578
- "high"
579
- when "performance_analysis"
580
- "high"
581
- else
582
- "low"
583
- end
584
- end
585
-
586
- def estimate_disk_usage(chunk)
587
- # Estimate disk usage for temporary files
588
- file_count = chunk[:files]&.length || 0
589
- file_count * 1024 * 1024 # 1MB per file for temporary storage
590
- end
591
-
592
- def analyze_time_chunk(chunk, analysis_type, options)
593
- # Analyze a time-based chunk
594
- {
595
- time_range: {
596
- start: chunk[:start_time],
597
- end: chunk[:end_time]
598
- },
599
- commits: chunk[:commits],
600
- files: chunk[:files],
601
- analysis_results: {}
602
- }
603
- end
604
-
605
- def analyze_commit_chunk(chunk, analysis_type, options)
606
- # Analyze a commit-based chunk
607
- {
608
- commit_range: {
609
- start: chunk[:start_index],
610
- end: chunk[:end_index]
611
- },
612
- commits: chunk[:commits],
613
- files: chunk[:files],
614
- analysis_results: {}
615
- }
616
- end
617
-
618
- def analyze_size_chunk(chunk, analysis_type, options)
619
- # Analyze a size-based chunk
620
- {
621
- size: chunk[:size],
622
- files: chunk[:files],
623
- directories: chunk[:directories],
624
- analysis_results: {}
625
- }
626
- end
627
-
628
- def analyze_feature_chunk(chunk, analysis_type, options)
629
- # Analyze a feature-based chunk
630
- {
631
- feature: chunk[:feature],
632
- files: chunk[:files],
633
- commits: chunk[:commits],
634
- analysis_results: {}
635
- }
636
- end
637
-
638
- def merge_chunk_data(merged_data, chunk_data)
639
- # Merge data from a chunk into the merged results
640
- chunk_data.each do |key, value|
641
- if merged_data[key].is_a?(Array) && value.is_a?(Array)
642
- merged_data[key].concat(value)
643
- elsif merged_data[key].is_a?(Hash) && value.is_a?(Hash)
644
- merged_data[key].merge!(value)
645
- else
646
- merged_data[key] = value
647
- end
648
- end
649
- end
650
-
651
- def calculate_average_chunk_size(chunks)
652
- return 0 if chunks.empty?
653
-
654
- total_size = chunks.sum do |chunk|
655
- (chunk[:files]&.length || 0) + (chunk[:commits]&.length || 0)
656
- end
657
-
658
- total_size.to_f / chunks.length
659
- end
660
-
661
- def analyze_chunk_distribution(chunks)
662
- # Analyze the distribution of chunk sizes
663
- sizes = chunks.map do |chunk|
664
- (chunk[:files]&.length || 0) + (chunk[:commits]&.length || 0)
665
- end
666
-
667
- {
668
- min: sizes.min,
669
- max: sizes.max,
670
- average: sizes.sum.to_f / sizes.length,
671
- median: calculate_median(sizes)
672
- }
673
- end
674
-
675
- def calculate_median(values)
676
- sorted = values.sort
677
- length = sorted.length
678
-
679
- if length.odd?
680
- sorted[length / 2]
681
- else
682
- (sorted[length / 2 - 1] + sorted[length / 2]) / 2.0
683
- end
684
- end
685
-
686
- def get_strategy_statistics(chunks, strategy)
687
- # Get statistics for a specific chunking strategy
688
- {
689
- chunk_count: chunks.length,
690
- total_files: chunks.sum { |c| c[:files]&.length || 0 },
691
- total_commits: chunks.sum { |c| c[:commits]&.length || 0 },
692
- average_size: calculate_average_chunk_size(chunks)
693
- }
694
- end
695
- end
696
- end
697
- end