aidp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +210 -0
  4. data/bin/aidp +5 -0
  5. data/lib/aidp/analyze/agent_personas.rb +71 -0
  6. data/lib/aidp/analyze/agent_tool_executor.rb +445 -0
  7. data/lib/aidp/analyze/data_retention_manager.rb +426 -0
  8. data/lib/aidp/analyze/database.rb +243 -0
  9. data/lib/aidp/analyze/dependencies.rb +335 -0
  10. data/lib/aidp/analyze/error_handler.rb +486 -0
  11. data/lib/aidp/analyze/export_manager.rb +425 -0
  12. data/lib/aidp/analyze/feature_analyzer.rb +397 -0
  13. data/lib/aidp/analyze/focus_guidance.rb +517 -0
  14. data/lib/aidp/analyze/incremental_analyzer.rb +543 -0
  15. data/lib/aidp/analyze/language_analysis_strategies.rb +897 -0
  16. data/lib/aidp/analyze/large_analysis_progress.rb +504 -0
  17. data/lib/aidp/analyze/memory_manager.rb +365 -0
  18. data/lib/aidp/analyze/parallel_processor.rb +460 -0
  19. data/lib/aidp/analyze/performance_optimizer.rb +694 -0
  20. data/lib/aidp/analyze/prioritizer.rb +402 -0
  21. data/lib/aidp/analyze/progress.rb +75 -0
  22. data/lib/aidp/analyze/progress_visualizer.rb +320 -0
  23. data/lib/aidp/analyze/report_generator.rb +582 -0
  24. data/lib/aidp/analyze/repository_chunker.rb +702 -0
  25. data/lib/aidp/analyze/ruby_maat_integration.rb +572 -0
  26. data/lib/aidp/analyze/runner.rb +245 -0
  27. data/lib/aidp/analyze/static_analysis_detector.rb +577 -0
  28. data/lib/aidp/analyze/steps.rb +53 -0
  29. data/lib/aidp/analyze/storage.rb +600 -0
  30. data/lib/aidp/analyze/tool_configuration.rb +456 -0
  31. data/lib/aidp/analyze/tool_modernization.rb +750 -0
  32. data/lib/aidp/execute/progress.rb +76 -0
  33. data/lib/aidp/execute/runner.rb +135 -0
  34. data/lib/aidp/execute/steps.rb +113 -0
  35. data/lib/aidp/shared/cli.rb +117 -0
  36. data/lib/aidp/shared/config.rb +35 -0
  37. data/lib/aidp/shared/project_detector.rb +119 -0
  38. data/lib/aidp/shared/providers/anthropic.rb +26 -0
  39. data/lib/aidp/shared/providers/base.rb +17 -0
  40. data/lib/aidp/shared/providers/cursor.rb +102 -0
  41. data/lib/aidp/shared/providers/gemini.rb +26 -0
  42. data/lib/aidp/shared/providers/macos_ui.rb +26 -0
  43. data/lib/aidp/shared/sync.rb +15 -0
  44. data/lib/aidp/shared/util.rb +41 -0
  45. data/lib/aidp/shared/version.rb +7 -0
  46. data/lib/aidp/shared/workspace.rb +21 -0
  47. data/lib/aidp.rb +53 -0
  48. data/templates/ANALYZE/01_REPOSITORY_ANALYSIS.md +100 -0
  49. data/templates/ANALYZE/02_ARCHITECTURE_ANALYSIS.md +151 -0
  50. data/templates/ANALYZE/03_TEST_ANALYSIS.md +182 -0
  51. data/templates/ANALYZE/04_FUNCTIONALITY_ANALYSIS.md +200 -0
  52. data/templates/ANALYZE/05_DOCUMENTATION_ANALYSIS.md +202 -0
  53. data/templates/ANALYZE/06_STATIC_ANALYSIS.md +233 -0
  54. data/templates/ANALYZE/07_REFACTORING_RECOMMENDATIONS.md +316 -0
  55. data/templates/COMMON/AGENT_BASE.md +129 -0
  56. data/templates/COMMON/CONVENTIONS.md +19 -0
  57. data/templates/COMMON/TEMPLATES/ADR_TEMPLATE.md +21 -0
  58. data/templates/COMMON/TEMPLATES/DOMAIN_CHARTER.md +27 -0
  59. data/templates/COMMON/TEMPLATES/EVENT_EXAMPLE.yaml +16 -0
  60. data/templates/COMMON/TEMPLATES/MERMAID_C4.md +46 -0
  61. data/templates/COMMON/TEMPLATES/OPENAPI_STUB.yaml +11 -0
  62. data/templates/EXECUTE/00_PRD.md +36 -0
  63. data/templates/EXECUTE/01_NFRS.md +27 -0
  64. data/templates/EXECUTE/02A_ARCH_GATE_QUESTIONS.md +13 -0
  65. data/templates/EXECUTE/02_ARCHITECTURE.md +42 -0
  66. data/templates/EXECUTE/03_ADR_FACTORY.md +22 -0
  67. data/templates/EXECUTE/04_DOMAIN_DECOMPOSITION.md +24 -0
  68. data/templates/EXECUTE/05_CONTRACTS.md +27 -0
  69. data/templates/EXECUTE/06_THREAT_MODEL.md +23 -0
  70. data/templates/EXECUTE/07_TEST_PLAN.md +24 -0
  71. data/templates/EXECUTE/08_TASKS.md +29 -0
  72. data/templates/EXECUTE/09_SCAFFOLDING_DEVEX.md +25 -0
  73. data/templates/EXECUTE/10_IMPLEMENTATION_AGENT.md +30 -0
  74. data/templates/EXECUTE/11_STATIC_ANALYSIS.md +22 -0
  75. data/templates/EXECUTE/12_OBSERVABILITY_SLOS.md +21 -0
  76. data/templates/EXECUTE/13_DELIVERY_ROLLOUT.md +21 -0
  77. data/templates/EXECUTE/14_DOCS_PORTAL.md +23 -0
  78. data/templates/EXECUTE/15_POST_RELEASE.md +25 -0
  79. metadata +301 -0
@@ -0,0 +1,702 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "yaml"
5
+
6
+ module Aidp
7
+ module Analyze
8
+ class RepositoryChunker
9
+ # Chunking strategies
10
+ CHUNKING_STRATEGIES = %w[time_based commit_count size_based feature_based].freeze
11
+
12
+ # Default chunking configuration
13
+ DEFAULT_CHUNK_CONFIG = {
14
+ "time_based" => {
15
+ "chunk_size" => "30d", # 30 days
16
+ "overlap" => "7d" # 7 days overlap
17
+ },
18
+ "commit_count" => {
19
+ "chunk_size" => 1000, # 1000 commits per chunk
20
+ "overlap" => 100 # 100 commits overlap
21
+ },
22
+ "size_based" => {
23
+ "chunk_size" => "100MB", # 100MB per chunk
24
+ "overlap" => "10MB" # 10MB overlap
25
+ },
26
+ "feature_based" => {
27
+ "max_files_per_chunk" => 500,
28
+ "max_commits_per_chunk" => 500
29
+ }
30
+ }.freeze
31
+
32
+ def initialize(project_dir = Dir.pwd, config = {})
33
+ @project_dir = project_dir
34
+ @config = config
35
+ @chunk_config = load_chunk_config
36
+ end
37
+
38
+ # Chunk repository for analysis
39
+ def chunk_repository(strategy = "time_based", options = {})
40
+ strategy_config = @chunk_config[strategy] || DEFAULT_CHUNK_CONFIG[strategy]
41
+
42
+ case strategy
43
+ when "time_based"
44
+ chunk_by_time(strategy_config, options)
45
+ when "commit_count"
46
+ chunk_by_commit_count(strategy_config, options)
47
+ when "size_based"
48
+ chunk_by_size(strategy_config, options)
49
+ when "feature_based"
50
+ chunk_by_features(strategy_config, options)
51
+ else
52
+ raise "Unknown chunking strategy: #{strategy}"
53
+ end
54
+ end
55
+
56
+ # Chunk by time periods
57
+ def chunk_by_time(config, options = {})
58
+ chunk_size = parse_time_duration(config["chunk_size"])
59
+ overlap = parse_time_duration(config["overlap"])
60
+
61
+ # Get repository time range
62
+ time_range = get_repository_time_range
63
+ return [] if time_range.empty?
64
+
65
+ chunks = []
66
+ current_start = time_range[:start]
67
+
68
+ while current_start < time_range[:end]
69
+ current_end = [current_start + chunk_size, time_range[:end]].min
70
+
71
+ chunk = {
72
+ id: generate_chunk_id("time", current_start),
73
+ strategy: "time_based",
74
+ start_time: current_start,
75
+ end_time: current_end,
76
+ duration: current_end - current_start,
77
+ commits: get_commits_in_time_range(current_start, current_end),
78
+ files: get_files_in_time_range(current_start, current_end),
79
+ overlap: overlap
80
+ }
81
+
82
+ chunks << chunk
83
+ current_start = current_end - overlap
84
+ end
85
+
86
+ {
87
+ strategy: "time_based",
88
+ total_chunks: chunks.length,
89
+ chunks: chunks,
90
+ time_range: time_range,
91
+ config: config
92
+ }
93
+ end
94
+
95
+ # Chunk by commit count
96
+ def chunk_by_commit_count(config, options = {})
97
+ chunk_size = config["chunk_size"] || 1000
98
+ overlap = config["overlap"] || 100
99
+
100
+ # Get all commits
101
+ all_commits = get_all_commits
102
+ return [] if all_commits.empty?
103
+
104
+ chunks = []
105
+ total_commits = all_commits.length
106
+
107
+ (0...total_commits).step(chunk_size - overlap) do |start_index|
108
+ end_index = [start_index + chunk_size, total_commits].min
109
+
110
+ chunk_commits = all_commits[start_index...end_index]
111
+
112
+ chunk = {
113
+ id: generate_chunk_id("commit", start_index),
114
+ strategy: "commit_count",
115
+ start_index: start_index,
116
+ end_index: end_index,
117
+ commit_count: chunk_commits.length,
118
+ commits: chunk_commits,
119
+ files: get_files_for_commits(chunk_commits),
120
+ overlap: overlap
121
+ }
122
+
123
+ chunks << chunk
124
+ end
125
+
126
+ {
127
+ strategy: "commit_count",
128
+ total_chunks: chunks.length,
129
+ chunks: chunks,
130
+ total_commits: total_commits,
131
+ config: config
132
+ }
133
+ end
134
+
135
+ # Chunk by repository size
136
+ def chunk_by_size(config, options = {})
137
+ chunk_size = parse_size(config["chunk_size"])
138
+ parse_size(config["overlap"])
139
+
140
+ # Get repository structure
141
+ repo_structure = analyze_repository_structure
142
+ return [] if repo_structure.empty?
143
+
144
+ chunks = []
145
+ current_chunk = {
146
+ id: generate_chunk_id("size", chunks.length),
147
+ strategy: "size_based",
148
+ files: [],
149
+ size: 0,
150
+ directories: []
151
+ }
152
+
153
+ repo_structure.each do |item|
154
+ if current_chunk[:size] + item[:size] > chunk_size
155
+ # Current chunk is full, save it and start new one
156
+ chunks << current_chunk
157
+ current_chunk = {
158
+ id: generate_chunk_id("size", chunks.length),
159
+ strategy: "size_based",
160
+ files: [],
161
+ size: 0,
162
+ directories: []
163
+ }
164
+ end
165
+
166
+ current_chunk[:files] << item[:path]
167
+ current_chunk[:size] += item[:size]
168
+ current_chunk[:directories] << File.dirname(item[:path])
169
+ end
170
+
171
+ # Add the last chunk if it has content
172
+ chunks << current_chunk if current_chunk[:files].any?
173
+
174
+ {
175
+ strategy: "size_based",
176
+ total_chunks: chunks.length,
177
+ chunks: chunks,
178
+ total_size: repo_structure.sum { |item| item[:size] },
179
+ config: config
180
+ }
181
+ end
182
+
183
+ # Chunk by features/components
184
+ def chunk_by_features(config, options = {})
185
+ max_files = config["max_files_per_chunk"] || 500
186
+ max_commits = config["max_commits_per_chunk"] || 500
187
+
188
+ # Identify features/components
189
+ features = identify_features
190
+ return [] if features.empty?
191
+
192
+ chunks = []
193
+
194
+ features.each do |feature|
195
+ feature_files = get_feature_files(feature)
196
+ feature_commits = get_feature_commits(feature)
197
+
198
+ # Split large features into chunks
199
+ if feature_files.length > max_files || feature_commits.length > max_commits
200
+ feature_chunks = split_large_feature(feature, feature_files, feature_commits, max_files, max_commits)
201
+ chunks.concat(feature_chunks)
202
+ else
203
+ chunk = {
204
+ id: generate_chunk_id("feature", feature[:name]),
205
+ strategy: "feature_based",
206
+ feature: feature,
207
+ files: feature_files,
208
+ commits: feature_commits,
209
+ file_count: feature_files.length,
210
+ commit_count: feature_commits.length
211
+ }
212
+ chunks << chunk
213
+ end
214
+ end
215
+
216
+ {
217
+ strategy: "feature_based",
218
+ total_chunks: chunks.length,
219
+ chunks: chunks,
220
+ features: features,
221
+ config: config
222
+ }
223
+ end
224
+
225
+ # Get chunk analysis plan
226
+ def get_chunk_analysis_plan(chunks, analysis_type, options = {})
227
+ plan = {
228
+ analysis_type: analysis_type,
229
+ total_chunks: chunks.length,
230
+ chunks: [],
231
+ estimated_duration: 0,
232
+ dependencies: []
233
+ }
234
+
235
+ chunks.each_with_index do |chunk, index|
236
+ chunk_plan = {
237
+ chunk_id: chunk[:id],
238
+ chunk_index: index,
239
+ strategy: chunk[:strategy],
240
+ estimated_duration: estimate_chunk_analysis_duration(chunk, analysis_type),
241
+ dependencies: get_chunk_dependencies(chunk, chunks),
242
+ priority: calculate_chunk_priority(chunk, analysis_type),
243
+ resources: estimate_chunk_resources(chunk, analysis_type)
244
+ }
245
+
246
+ plan[:chunks] << chunk_plan
247
+ plan[:estimated_duration] += chunk_plan[:estimated_duration]
248
+ end
249
+
250
+ # Sort chunks by priority
251
+ plan[:chunks].sort_by! { |chunk| -chunk[:priority] }
252
+
253
+ plan
254
+ end
255
+
256
+ # Execute chunk analysis
257
+ def execute_chunk_analysis(chunk, analysis_type, options = {})
258
+ start_time = Time.now
259
+
260
+ results = {
261
+ chunk_id: chunk[:id],
262
+ analysis_type: analysis_type,
263
+ start_time: start_time,
264
+ status: "running"
265
+ }
266
+
267
+ begin
268
+ # Perform analysis based on chunk type
269
+ case chunk[:strategy]
270
+ when "time_based"
271
+ results[:data] = analyze_time_chunk(chunk, analysis_type, options)
272
+ when "commit_count"
273
+ results[:data] = analyze_commit_chunk(chunk, analysis_type, options)
274
+ when "size_based"
275
+ results[:data] = analyze_size_chunk(chunk, analysis_type, options)
276
+ when "feature_based"
277
+ results[:data] = analyze_feature_chunk(chunk, analysis_type, options)
278
+ end
279
+
280
+ results[:status] = "completed"
281
+ results[:end_time] = Time.now
282
+ results[:duration] = results[:end_time] - results[:start_time]
283
+ rescue => e
284
+ results[:status] = "failed"
285
+ results[:error] = e.message
286
+ results[:end_time] = Time.now
287
+ results[:duration] = results[:end_time] - results[:start_time]
288
+ end
289
+
290
+ results
291
+ end
292
+
293
+ # Merge chunk analysis results
294
+ def merge_chunk_results(chunk_results, options = {})
295
+ merged = {
296
+ total_chunks: chunk_results.length,
297
+ successful_chunks: chunk_results.count { |r| r[:status] == "completed" },
298
+ failed_chunks: chunk_results.count { |r| r[:status] == "failed" },
299
+ total_duration: chunk_results.sum { |r| r[:duration] || 0 },
300
+ merged_data: {},
301
+ errors: []
302
+ }
303
+
304
+ # Collect errors
305
+ chunk_results.each do |result|
306
+ next unless result[:status] == "failed"
307
+
308
+ merged[:errors] << {
309
+ chunk_id: result[:chunk_id],
310
+ error: result[:error]
311
+ }
312
+ end
313
+
314
+ # Merge successful results
315
+ successful_results = chunk_results.select { |r| r[:status] == "completed" }
316
+
317
+ successful_results.each do |result|
318
+ merge_chunk_data(merged[:merged_data], result[:data])
319
+ end
320
+
321
+ merged
322
+ end
323
+
324
+ # Get chunk statistics
325
+ def get_chunk_statistics(chunks)
326
+ return {} if chunks.empty?
327
+
328
+ stats = {
329
+ total_chunks: chunks.length,
330
+ strategies: chunks.map { |c| c[:strategy] }.tally,
331
+ total_files: chunks.sum { |c| c[:files]&.length || 0 },
332
+ total_commits: chunks.sum { |c| c[:commits]&.length || 0 },
333
+ average_chunk_size: calculate_average_chunk_size(chunks),
334
+ chunk_distribution: analyze_chunk_distribution(chunks)
335
+ }
336
+
337
+ # Strategy-specific statistics
338
+ strategies = chunks.map { |c| c[:strategy] }.uniq
339
+ strategies.each do |strategy|
340
+ strategy_chunks = chunks.select { |c| c[:strategy] == strategy }
341
+ stats["#{strategy}_stats"] = get_strategy_statistics(strategy_chunks, strategy)
342
+ end
343
+
344
+ stats
345
+ end
346
+
347
+ private
348
+
349
+ def load_chunk_config
350
+ config_file = File.join(@project_dir, ".aidp-chunk-config.yml")
351
+
352
+ if File.exist?(config_file)
353
+ YAML.load_file(config_file) || DEFAULT_CHUNK_CONFIG
354
+ else
355
+ DEFAULT_CHUNK_CONFIG
356
+ end
357
+ end
358
+
359
+ def parse_time_duration(duration_str)
360
+ # Parse duration strings like "30d", "7d", "1w", etc.
361
+ case duration_str
362
+ when /(\d+)d/
363
+ ::Regexp.last_match(1).to_i * 24 * 60 * 60
364
+ when /(\d+)w/
365
+ ::Regexp.last_match(1).to_i * 7 * 24 * 60 * 60
366
+ when /(\d+)m/
367
+ ::Regexp.last_match(1).to_i * 30 * 24 * 60 * 60
368
+ when /(\d+)y/
369
+ ::Regexp.last_match(1).to_i * 365 * 24 * 60 * 60
370
+ else
371
+ 30 * 24 * 60 * 60 # Default to 30 days
372
+ end
373
+ end
374
+
375
+ def parse_size(size_str)
376
+ # Parse size strings like "100MB", "1GB", etc.
377
+ case size_str
378
+ when /(\d+)KB/i
379
+ ::Regexp.last_match(1).to_i * 1024
380
+ when /(\d+)MB/i
381
+ ::Regexp.last_match(1).to_i * 1024 * 1024
382
+ when /(\d+)GB/i
383
+ ::Regexp.last_match(1).to_i * 1024 * 1024 * 1024
384
+ else
385
+ 100 * 1024 * 1024 # Default to 100MB
386
+ end
387
+ end
388
+
389
+ def get_repository_time_range
390
+ # Get the time range of the repository
391
+ # This would use git commands to get the first and last commit dates
392
+ {
393
+ start: Time.now - (365 * 24 * 60 * 60), # 1 year ago
394
+ end: Time.now
395
+ }
396
+ end
397
+
398
+ def get_commits_in_time_range(start_time, end_time)
399
+ # Get commits within the specified time range
400
+ # This would use git log with date filtering
401
+ []
402
+ end
403
+
404
+ def get_files_in_time_range(start_time, end_time)
405
+ # Get files modified within the specified time range
406
+ # This would use git log --name-only with date filtering
407
+ []
408
+ end
409
+
410
+ def get_all_commits
411
+ # Get all commits in the repository
412
+ # This would use git log
413
+ []
414
+ end
415
+
416
+ def get_files_for_commits(commits)
417
+ # Get files modified in the specified commits
418
+ # This would use git show --name-only
419
+ []
420
+ end
421
+
422
+ def analyze_repository_structure
423
+ # Analyze the repository structure to get file sizes and organization
424
+ structure = []
425
+
426
+ Dir.glob(File.join(@project_dir, "**", "*")).each do |path|
427
+ next unless File.file?(path)
428
+
429
+ relative_path = path.sub(@project_dir + "/", "")
430
+ structure << {
431
+ path: relative_path,
432
+ size: File.size(path),
433
+ type: File.extname(path)
434
+ }
435
+ end
436
+
437
+ structure
438
+ end
439
+
440
+ def identify_features
441
+ # Identify features/components in the repository
442
+ features = []
443
+
444
+ # Look for common feature patterns
445
+ feature_patterns = [
446
+ "app/features/**/*",
447
+ "features/**/*",
448
+ "src/features/**/*",
449
+ "lib/features/**/*"
450
+ ]
451
+
452
+ feature_patterns.each do |pattern|
453
+ Dir.glob(File.join(@project_dir, pattern)).each do |path|
454
+ next unless Dir.exist?(path)
455
+
456
+ feature_name = File.basename(path)
457
+ features << {
458
+ name: feature_name,
459
+ path: path.sub(@project_dir + "/", ""),
460
+ type: "directory"
461
+ }
462
+ end
463
+ end
464
+
465
+ features
466
+ end
467
+
468
+ def get_feature_files(feature)
469
+ # Get files associated with a feature
470
+ feature_path = File.join(@project_dir, feature[:path])
471
+ return [] unless Dir.exist?(feature_path)
472
+
473
+ files = []
474
+ Dir.glob(File.join(feature_path, "**", "*")).each do |path|
475
+ next unless File.file?(path)
476
+
477
+ files << path.sub(@project_dir + "/", "")
478
+ end
479
+
480
+ files
481
+ end
482
+
483
+ def get_feature_commits(feature)
484
+ # Get commits related to a feature
485
+ # This would use git log with path filtering
486
+ []
487
+ end
488
+
489
+ def split_large_feature(feature, files, commits, max_files, max_commits)
490
+ # Split a large feature into smaller chunks
491
+ chunks = []
492
+
493
+ # Split by files
494
+ files.each_slice(max_files) do |file_chunk|
495
+ chunks << {
496
+ id: generate_chunk_id("feature", "#{feature[:name]}_files_#{chunks.length}"),
497
+ strategy: "feature_based",
498
+ feature: feature,
499
+ files: file_chunk,
500
+ commits: commits,
501
+ file_count: file_chunk.length,
502
+ commit_count: commits.length
503
+ }
504
+ end
505
+
506
+ chunks
507
+ end
508
+
509
+ def generate_chunk_id(prefix, identifier)
510
+ timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
511
+ "#{prefix}_#{identifier}_#{timestamp}"
512
+ end
513
+
514
+ def estimate_chunk_analysis_duration(chunk, analysis_type)
515
+ # Estimate analysis duration based on chunk size and analysis type
516
+ base_duration = case analysis_type
517
+ when "static_analysis"
518
+ 30 # seconds per file
519
+ when "security_analysis"
520
+ 60 # seconds per file
521
+ when "performance_analysis"
522
+ 45 # seconds per file
523
+ else
524
+ 30 # seconds per file
525
+ end
526
+
527
+ file_count = chunk[:files]&.length || 0
528
+ commit_count = chunk[:commits]&.length || 0
529
+
530
+ (file_count + commit_count) * base_duration
531
+ end
532
+
533
+ def get_chunk_dependencies(chunk, all_chunks)
534
+ # Get dependencies between chunks
535
+ []
536
+
537
+ # This would analyze relationships between chunks
538
+ # For now, return empty array
539
+ end
540
+
541
+ def calculate_chunk_priority(chunk, analysis_type)
542
+ # Calculate priority for chunk analysis
543
+ priority = 0
544
+
545
+ # Higher priority for larger chunks
546
+ priority += chunk[:files]&.length || 0
547
+ priority += chunk[:commits]&.length || 0
548
+
549
+ # Higher priority for certain analysis types
550
+ case analysis_type
551
+ when "security_analysis"
552
+ priority *= 2
553
+ when "performance_analysis"
554
+ priority *= 1.5
555
+ end
556
+
557
+ priority
558
+ end
559
+
560
+ def estimate_chunk_resources(chunk, analysis_type)
561
+ # Estimate resource requirements for chunk analysis
562
+ {
563
+ memory: estimate_memory_usage(chunk),
564
+ cpu: estimate_cpu_usage(chunk, analysis_type),
565
+ disk: estimate_disk_usage(chunk)
566
+ }
567
+ end
568
+
569
+ def estimate_memory_usage(chunk)
570
+ # Estimate memory usage based on chunk size
571
+ file_count = chunk[:files]&.length || 0
572
+ commit_count = chunk[:commits]&.length || 0
573
+
574
+ (file_count + commit_count) * 1024 * 1024 # 1MB per item
575
+ end
576
+
577
+ def estimate_cpu_usage(chunk, analysis_type)
578
+ # Estimate CPU usage based on analysis type
579
+ case analysis_type
580
+ when "static_analysis"
581
+ "medium"
582
+ when "security_analysis"
583
+ "high"
584
+ when "performance_analysis"
585
+ "high"
586
+ else
587
+ "low"
588
+ end
589
+ end
590
+
591
+ def estimate_disk_usage(chunk)
592
+ # Estimate disk usage for temporary files
593
+ file_count = chunk[:files]&.length || 0
594
+ file_count * 1024 * 1024 # 1MB per file for temporary storage
595
+ end
596
+
597
+ def analyze_time_chunk(chunk, analysis_type, options)
598
+ # Analyze a time-based chunk
599
+ {
600
+ time_range: {
601
+ start: chunk[:start_time],
602
+ end: chunk[:end_time]
603
+ },
604
+ commits: chunk[:commits],
605
+ files: chunk[:files],
606
+ analysis_results: {}
607
+ }
608
+ end
609
+
610
+ def analyze_commit_chunk(chunk, analysis_type, options)
611
+ # Analyze a commit-based chunk
612
+ {
613
+ commit_range: {
614
+ start: chunk[:start_index],
615
+ end: chunk[:end_index]
616
+ },
617
+ commits: chunk[:commits],
618
+ files: chunk[:files],
619
+ analysis_results: {}
620
+ }
621
+ end
622
+
623
+ def analyze_size_chunk(chunk, analysis_type, options)
624
+ # Analyze a size-based chunk
625
+ {
626
+ size: chunk[:size],
627
+ files: chunk[:files],
628
+ directories: chunk[:directories],
629
+ analysis_results: {}
630
+ }
631
+ end
632
+
633
+ def analyze_feature_chunk(chunk, analysis_type, options)
634
+ # Analyze a feature-based chunk
635
+ {
636
+ feature: chunk[:feature],
637
+ files: chunk[:files],
638
+ commits: chunk[:commits],
639
+ analysis_results: {}
640
+ }
641
+ end
642
+
643
+ def merge_chunk_data(merged_data, chunk_data)
644
+ # Merge data from a chunk into the merged results
645
+ chunk_data.each do |key, value|
646
+ if merged_data[key].is_a?(Array) && value.is_a?(Array)
647
+ merged_data[key].concat(value)
648
+ elsif merged_data[key].is_a?(Hash) && value.is_a?(Hash)
649
+ merged_data[key].merge!(value)
650
+ else
651
+ merged_data[key] = value
652
+ end
653
+ end
654
+ end
655
+
656
+ def calculate_average_chunk_size(chunks)
657
+ return 0 if chunks.empty?
658
+
659
+ total_size = chunks.sum do |chunk|
660
+ (chunk[:files]&.length || 0) + (chunk[:commits]&.length || 0)
661
+ end
662
+
663
+ total_size.to_f / chunks.length
664
+ end
665
+
666
+ def analyze_chunk_distribution(chunks)
667
+ # Analyze the distribution of chunk sizes
668
+ sizes = chunks.map do |chunk|
669
+ (chunk[:files]&.length || 0) + (chunk[:commits]&.length || 0)
670
+ end
671
+
672
+ {
673
+ min: sizes.min,
674
+ max: sizes.max,
675
+ average: sizes.sum.to_f / sizes.length,
676
+ median: calculate_median(sizes)
677
+ }
678
+ end
679
+
680
+ def calculate_median(values)
681
+ sorted = values.sort
682
+ length = sorted.length
683
+
684
+ if length.odd?
685
+ sorted[length / 2]
686
+ else
687
+ (sorted[length / 2 - 1] + sorted[length / 2]) / 2.0
688
+ end
689
+ end
690
+
691
+ def get_strategy_statistics(chunks, strategy)
692
+ # Get statistics for a specific chunking strategy
693
+ {
694
+ chunk_count: chunks.length,
695
+ total_files: chunks.sum { |c| c[:files]&.length || 0 },
696
+ total_commits: chunks.sum { |c| c[:commits]&.length || 0 },
697
+ average_size: calculate_average_chunk_size(chunks)
698
+ }
699
+ end
700
+ end
701
+ end
702
+ end