aidp 0.5.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +128 -151
  3. data/bin/aidp +1 -1
  4. data/lib/aidp/analysis/kb_inspector.rb +471 -0
  5. data/lib/aidp/analysis/seams.rb +159 -0
  6. data/lib/aidp/analysis/tree_sitter_grammar_loader.rb +480 -0
  7. data/lib/aidp/analysis/tree_sitter_scan.rb +686 -0
  8. data/lib/aidp/analyze/error_handler.rb +2 -78
  9. data/lib/aidp/analyze/json_file_storage.rb +292 -0
  10. data/lib/aidp/analyze/progress.rb +12 -0
  11. data/lib/aidp/analyze/progress_visualizer.rb +12 -17
  12. data/lib/aidp/analyze/ruby_maat_integration.rb +13 -31
  13. data/lib/aidp/analyze/runner.rb +256 -87
  14. data/lib/aidp/analyze/steps.rb +6 -0
  15. data/lib/aidp/cli/jobs_command.rb +103 -435
  16. data/lib/aidp/cli.rb +317 -191
  17. data/lib/aidp/config.rb +298 -10
  18. data/lib/aidp/debug_logger.rb +195 -0
  19. data/lib/aidp/debug_mixin.rb +187 -0
  20. data/lib/aidp/execute/progress.rb +9 -0
  21. data/lib/aidp/execute/runner.rb +221 -40
  22. data/lib/aidp/execute/steps.rb +17 -7
  23. data/lib/aidp/execute/workflow_selector.rb +211 -0
  24. data/lib/aidp/harness/completion_checker.rb +268 -0
  25. data/lib/aidp/harness/condition_detector.rb +1526 -0
  26. data/lib/aidp/harness/config_loader.rb +373 -0
  27. data/lib/aidp/harness/config_manager.rb +382 -0
  28. data/lib/aidp/harness/config_schema.rb +1006 -0
  29. data/lib/aidp/harness/config_validator.rb +355 -0
  30. data/lib/aidp/harness/configuration.rb +477 -0
  31. data/lib/aidp/harness/enhanced_runner.rb +494 -0
  32. data/lib/aidp/harness/error_handler.rb +616 -0
  33. data/lib/aidp/harness/provider_config.rb +423 -0
  34. data/lib/aidp/harness/provider_factory.rb +306 -0
  35. data/lib/aidp/harness/provider_manager.rb +1269 -0
  36. data/lib/aidp/harness/provider_type_checker.rb +88 -0
  37. data/lib/aidp/harness/runner.rb +411 -0
  38. data/lib/aidp/harness/state/errors.rb +28 -0
  39. data/lib/aidp/harness/state/metrics.rb +219 -0
  40. data/lib/aidp/harness/state/persistence.rb +128 -0
  41. data/lib/aidp/harness/state/provider_state.rb +132 -0
  42. data/lib/aidp/harness/state/ui_state.rb +68 -0
  43. data/lib/aidp/harness/state/workflow_state.rb +123 -0
  44. data/lib/aidp/harness/state_manager.rb +586 -0
  45. data/lib/aidp/harness/status_display.rb +888 -0
  46. data/lib/aidp/harness/ui/base.rb +16 -0
  47. data/lib/aidp/harness/ui/enhanced_tui.rb +545 -0
  48. data/lib/aidp/harness/ui/enhanced_workflow_selector.rb +252 -0
  49. data/lib/aidp/harness/ui/error_handler.rb +132 -0
  50. data/lib/aidp/harness/ui/frame_manager.rb +361 -0
  51. data/lib/aidp/harness/ui/job_monitor.rb +500 -0
  52. data/lib/aidp/harness/ui/navigation/main_menu.rb +311 -0
  53. data/lib/aidp/harness/ui/navigation/menu_formatter.rb +120 -0
  54. data/lib/aidp/harness/ui/navigation/menu_item.rb +142 -0
  55. data/lib/aidp/harness/ui/navigation/menu_state.rb +139 -0
  56. data/lib/aidp/harness/ui/navigation/submenu.rb +202 -0
  57. data/lib/aidp/harness/ui/navigation/workflow_selector.rb +176 -0
  58. data/lib/aidp/harness/ui/progress_display.rb +280 -0
  59. data/lib/aidp/harness/ui/question_collector.rb +141 -0
  60. data/lib/aidp/harness/ui/spinner_group.rb +184 -0
  61. data/lib/aidp/harness/ui/spinner_helper.rb +152 -0
  62. data/lib/aidp/harness/ui/status_manager.rb +312 -0
  63. data/lib/aidp/harness/ui/status_widget.rb +280 -0
  64. data/lib/aidp/harness/ui/workflow_controller.rb +312 -0
  65. data/lib/aidp/harness/user_interface.rb +2381 -0
  66. data/lib/aidp/provider_manager.rb +131 -7
  67. data/lib/aidp/providers/anthropic.rb +28 -109
  68. data/lib/aidp/providers/base.rb +170 -0
  69. data/lib/aidp/providers/cursor.rb +52 -183
  70. data/lib/aidp/providers/gemini.rb +24 -109
  71. data/lib/aidp/providers/macos_ui.rb +99 -5
  72. data/lib/aidp/providers/opencode.rb +194 -0
  73. data/lib/aidp/storage/csv_storage.rb +172 -0
  74. data/lib/aidp/storage/file_manager.rb +214 -0
  75. data/lib/aidp/storage/json_storage.rb +140 -0
  76. data/lib/aidp/version.rb +1 -1
  77. data/lib/aidp.rb +56 -35
  78. data/templates/ANALYZE/06a_tree_sitter_scan.md +217 -0
  79. data/templates/COMMON/AGENT_BASE.md +11 -0
  80. data/templates/EXECUTE/00_PRD.md +4 -4
  81. data/templates/EXECUTE/02_ARCHITECTURE.md +5 -4
  82. data/templates/EXECUTE/07_TEST_PLAN.md +4 -1
  83. data/templates/EXECUTE/08_TASKS.md +4 -4
  84. data/templates/EXECUTE/10_IMPLEMENTATION_AGENT.md +4 -4
  85. data/templates/README.md +279 -0
  86. data/templates/aidp-development.yml.example +373 -0
  87. data/templates/aidp-minimal.yml.example +48 -0
  88. data/templates/aidp-production.yml.example +475 -0
  89. data/templates/aidp.yml.example +598 -0
  90. metadata +106 -64
  91. data/lib/aidp/analyze/agent_personas.rb +0 -71
  92. data/lib/aidp/analyze/agent_tool_executor.rb +0 -445
  93. data/lib/aidp/analyze/data_retention_manager.rb +0 -426
  94. data/lib/aidp/analyze/database.rb +0 -260
  95. data/lib/aidp/analyze/dependencies.rb +0 -335
  96. data/lib/aidp/analyze/export_manager.rb +0 -425
  97. data/lib/aidp/analyze/focus_guidance.rb +0 -517
  98. data/lib/aidp/analyze/incremental_analyzer.rb +0 -543
  99. data/lib/aidp/analyze/language_analysis_strategies.rb +0 -897
  100. data/lib/aidp/analyze/large_analysis_progress.rb +0 -504
  101. data/lib/aidp/analyze/memory_manager.rb +0 -365
  102. data/lib/aidp/analyze/metrics_storage.rb +0 -336
  103. data/lib/aidp/analyze/parallel_processor.rb +0 -460
  104. data/lib/aidp/analyze/performance_optimizer.rb +0 -694
  105. data/lib/aidp/analyze/repository_chunker.rb +0 -704
  106. data/lib/aidp/analyze/static_analysis_detector.rb +0 -577
  107. data/lib/aidp/analyze/storage.rb +0 -662
  108. data/lib/aidp/analyze/tool_configuration.rb +0 -456
  109. data/lib/aidp/analyze/tool_modernization.rb +0 -750
  110. data/lib/aidp/database/pg_adapter.rb +0 -148
  111. data/lib/aidp/database_config.rb +0 -69
  112. data/lib/aidp/database_connection.rb +0 -72
  113. data/lib/aidp/database_migration.rb +0 -158
  114. data/lib/aidp/job_manager.rb +0 -41
  115. data/lib/aidp/jobs/base_job.rb +0 -47
  116. data/lib/aidp/jobs/provider_execution_job.rb +0 -96
  117. data/lib/aidp/project_detector.rb +0 -117
  118. data/lib/aidp/providers/agent_supervisor.rb +0 -348
  119. data/lib/aidp/providers/supervised_base.rb +0 -317
  120. data/lib/aidp/providers/supervised_cursor.rb +0 -22
  121. data/lib/aidp/sync.rb +0 -13
  122. data/lib/aidp/workspace.rb +0 -19
@@ -1,704 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "json"
4
- require "yaml"
5
-
6
- module Aidp
7
- module Analyze
8
- class RepositoryChunker
9
- # Chunking strategies
10
- CHUNKING_STRATEGIES = %w[time_based commit_count size_based feature_based].freeze
11
-
12
- # Default chunking configuration
13
- DEFAULT_CHUNK_CONFIG = {
14
- "time_based" => {
15
- "chunk_size" => "30d", # 30 days
16
- "overlap" => "7d" # 7 days overlap
17
- },
18
- "commit_count" => {
19
- "chunk_size" => 1000, # 1000 commits per chunk
20
- "overlap" => 100 # 100 commits overlap
21
- },
22
- "size_based" => {
23
- "chunk_size" => "100MB", # 100MB per chunk
24
- "overlap" => "10MB" # 10MB overlap
25
- },
26
- "feature_based" => {
27
- "max_files_per_chunk" => 500,
28
- "max_commits_per_chunk" => 500
29
- }
30
- }.freeze
31
-
32
- def initialize(project_dir = Dir.pwd, config = {})
33
- @project_dir = project_dir
34
- @config = config
35
- @chunk_config = load_chunk_config
36
- end
37
-
38
- # Chunk repository for analysis
39
- def chunk_repository(strategy = "time_based", options = {})
40
- strategy_config = @chunk_config[strategy] || DEFAULT_CHUNK_CONFIG[strategy]
41
-
42
- case strategy
43
- when "time_based"
44
- chunk_by_time(strategy_config, options)
45
- when "commit_count"
46
- chunk_by_commit_count(strategy_config, options)
47
- when "size_based"
48
- chunk_by_size(strategy_config, options)
49
- when "feature_based"
50
- chunk_by_features(strategy_config, options)
51
- else
52
- raise "Unknown chunking strategy: #{strategy}"
53
- end
54
- end
55
-
56
- # Chunk by time periods
57
- def chunk_by_time(config, options = {})
58
- chunk_size = parse_time_duration(config["chunk_size"])
59
- overlap = parse_time_duration(config["overlap"])
60
-
61
- # Get repository time range
62
- time_range = get_repository_time_range
63
- return [] if time_range.empty?
64
-
65
- chunks = []
66
- current_start = time_range[:start]
67
-
68
- while current_start < time_range[:end]
69
- current_end = [current_start + chunk_size, time_range[:end]].min
70
-
71
- chunk = {
72
- id: generate_chunk_id("time", current_start),
73
- strategy: "time_based",
74
- start_time: current_start,
75
- end_time: current_end,
76
- duration: current_end - current_start,
77
- commits: get_commits_in_time_range(current_start, current_end),
78
- files: get_files_in_time_range(current_start, current_end),
79
- overlap: overlap
80
- }
81
-
82
- chunks << chunk
83
- current_start = current_end - overlap
84
- end
85
-
86
- {
87
- strategy: "time_based",
88
- total_chunks: chunks.length,
89
- chunks: chunks,
90
- time_range: time_range,
91
- config: config
92
- }
93
- end
94
-
95
- # Chunk by commit count
96
- def chunk_by_commit_count(config, options = {})
97
- chunk_size = config["chunk_size"] || 1000
98
- overlap = config["overlap"] || 100
99
-
100
- # Get all commits
101
- all_commits = get_all_commits
102
- return [] if all_commits.empty?
103
-
104
- chunks = []
105
- total_commits = all_commits.length
106
-
107
- (0...total_commits).step(chunk_size - overlap) do |start_index|
108
- end_index = [start_index + chunk_size, total_commits].min
109
-
110
- chunk_commits = all_commits[start_index...end_index]
111
-
112
- chunk = {
113
- id: generate_chunk_id("commit", start_index),
114
- strategy: "commit_count",
115
- start_index: start_index,
116
- end_index: end_index,
117
- commit_count: chunk_commits.length,
118
- commits: chunk_commits,
119
- files: get_files_for_commits(chunk_commits),
120
- overlap: overlap
121
- }
122
-
123
- chunks << chunk
124
- end
125
-
126
- {
127
- strategy: "commit_count",
128
- total_chunks: chunks.length,
129
- chunks: chunks,
130
- total_commits: total_commits,
131
- config: config
132
- }
133
- end
134
-
135
- # Chunk by repository size
136
- def chunk_by_size(config, options = {})
137
- chunk_size = parse_size(config["chunk_size"])
138
- parse_size(config["overlap"])
139
-
140
- # Get repository structure
141
- repo_structure = analyze_repository_structure
142
- return [] if repo_structure.empty?
143
-
144
- chunks = []
145
- current_chunk = {
146
- id: generate_chunk_id("size", chunks.length),
147
- strategy: "size_based",
148
- files: [],
149
- size: 0,
150
- directories: []
151
- }
152
-
153
- repo_structure.each do |item|
154
- if current_chunk[:size] + item[:size] > chunk_size
155
- # Current chunk is full, save it and start new one
156
- chunks << current_chunk
157
- current_chunk = {
158
- id: generate_chunk_id("size", chunks.length),
159
- strategy: "size_based",
160
- files: [],
161
- size: 0,
162
- directories: []
163
- }
164
- end
165
-
166
- current_chunk[:files] << item[:path]
167
- current_chunk[:size] += item[:size]
168
- current_chunk[:directories] << File.dirname(item[:path])
169
- end
170
-
171
- # Add the last chunk if it has content
172
- chunks << current_chunk if current_chunk[:files].any?
173
-
174
- {
175
- strategy: "size_based",
176
- total_chunks: chunks.length,
177
- chunks: chunks,
178
- total_size: repo_structure.sum { |item| item[:size] },
179
- config: config
180
- }
181
- end
182
-
183
- # Chunk by features/components
184
- def chunk_by_features(config, options = {})
185
- max_files = config["max_files_per_chunk"] || 500
186
- max_commits = config["max_commits_per_chunk"] || 500
187
-
188
- # Identify features/components
189
- features = identify_features
190
- return [] if features.empty?
191
-
192
- chunks = []
193
-
194
- features.each do |feature|
195
- feature_files = get_feature_files(feature)
196
- feature_commits = get_feature_commits(feature)
197
-
198
- # Split large features into chunks
199
- if feature_files.length > max_files || feature_commits.length > max_commits
200
- feature_chunks = split_large_feature(feature, feature_files, feature_commits, max_files, max_commits)
201
- chunks.concat(feature_chunks)
202
- else
203
- chunk = {
204
- id: generate_chunk_id("feature", feature[:name]),
205
- strategy: "feature_based",
206
- feature: feature,
207
- files: feature_files,
208
- commits: feature_commits,
209
- file_count: feature_files.length,
210
- commit_count: feature_commits.length
211
- }
212
- chunks << chunk
213
- end
214
- end
215
-
216
- {
217
- strategy: "feature_based",
218
- total_chunks: chunks.length,
219
- chunks: chunks,
220
- features: features,
221
- config: config
222
- }
223
- end
224
-
225
- # Get chunk analysis plan
226
- def get_chunk_analysis_plan(chunks, analysis_type, options = {})
227
- plan = {
228
- analysis_type: analysis_type,
229
- total_chunks: chunks.length,
230
- chunks: [],
231
- estimated_duration: 0,
232
- dependencies: []
233
- }
234
-
235
- chunks.each_with_index do |chunk, index|
236
- chunk_plan = {
237
- chunk_id: chunk[:id],
238
- chunk_index: index,
239
- strategy: chunk[:strategy],
240
- estimated_duration: estimate_chunk_analysis_duration(chunk, analysis_type),
241
- dependencies: get_chunk_dependencies(chunk, chunks),
242
- priority: calculate_chunk_priority(chunk, analysis_type),
243
- resources: estimate_chunk_resources(chunk, analysis_type)
244
- }
245
-
246
- plan[:chunks] << chunk_plan
247
- plan[:estimated_duration] += chunk_plan[:estimated_duration]
248
- end
249
-
250
- # Sort chunks by priority
251
- plan[:chunks].sort_by! { |chunk| -chunk[:priority] }
252
-
253
- plan
254
- end
255
-
256
- # Execute chunk analysis
257
- def execute_chunk_analysis(chunk, analysis_type, options = {})
258
- start_time = Time.now
259
-
260
- results = {
261
- chunk_id: chunk[:id],
262
- analysis_type: analysis_type,
263
- start_time: start_time,
264
- status: "running"
265
- }
266
-
267
- begin
268
- # Perform analysis based on chunk type
269
- case chunk[:strategy]
270
- when "time_based"
271
- results[:data] = analyze_time_chunk(chunk, analysis_type, options)
272
- when "commit_count"
273
- results[:data] = analyze_commit_chunk(chunk, analysis_type, options)
274
- when "size_based"
275
- results[:data] = analyze_size_chunk(chunk, analysis_type, options)
276
- when "feature_based"
277
- results[:data] = analyze_feature_chunk(chunk, analysis_type, options)
278
- end
279
-
280
- results[:status] = "completed"
281
- results[:end_time] = Time.now
282
- results[:duration] = results[:end_time] - results[:start_time]
283
- rescue => e
284
- results[:status] = "failed"
285
- results[:error] = e.message
286
- results[:end_time] = Time.now
287
- results[:duration] = results[:end_time] - results[:start_time]
288
- end
289
-
290
- results
291
- end
292
-
293
- # Merge chunk analysis results
294
- def merge_chunk_results(chunk_results, options = {})
295
- merged = {
296
- total_chunks: chunk_results.length,
297
- successful_chunks: chunk_results.count { |r| r[:status] == "completed" },
298
- failed_chunks: chunk_results.count { |r| r[:status] == "failed" },
299
- total_duration: chunk_results.sum { |r| r[:duration] || 0 },
300
- merged_data: {},
301
- errors: []
302
- }
303
-
304
- # Collect errors
305
- chunk_results.each do |result|
306
- next unless result[:status] == "failed"
307
-
308
- merged[:errors] << {
309
- chunk_id: result[:chunk_id],
310
- error: result[:error]
311
- }
312
- end
313
-
314
- # Merge successful results
315
- successful_results = chunk_results.select { |r| r[:status] == "completed" }
316
-
317
- successful_results.each do |result|
318
- merge_chunk_data(merged[:merged_data], result[:data])
319
- end
320
-
321
- merged
322
- end
323
-
324
- # Get chunk statistics
325
- def get_chunk_statistics(chunks)
326
- return {} if chunks.empty?
327
-
328
- stats = {
329
- total_chunks: chunks.length,
330
- strategies: chunks.map { |c| c[:strategy] }.tally,
331
- total_files: chunks.sum { |c| c[:files]&.length || 0 },
332
- total_commits: chunks.sum { |c| c[:commits]&.length || 0 },
333
- average_chunk_size: calculate_average_chunk_size(chunks),
334
- chunk_distribution: analyze_chunk_distribution(chunks)
335
- }
336
-
337
- # Strategy-specific statistics
338
- strategies = chunks.map { |c| c[:strategy] }.uniq
339
- strategies.each do |strategy|
340
- strategy_chunks = chunks.select { |c| c[:strategy] == strategy }
341
- stats["#{strategy}_stats"] = get_strategy_statistics(strategy_chunks, strategy)
342
- end
343
-
344
- stats
345
- end
346
-
347
- private
348
-
349
- def load_chunk_config
350
- config_file = File.join(@project_dir, ".aidp-chunk-config.yml")
351
-
352
- if File.exist?(config_file)
353
- YAML.load_file(config_file) || DEFAULT_CHUNK_CONFIG
354
- else
355
- DEFAULT_CHUNK_CONFIG
356
- end
357
- end
358
-
359
- def parse_time_duration(duration_str)
360
- # Parse duration strings like "30d", "7d", "1w", etc.
361
- # Use anchored patterns with limited digit repetition to prevent ReDoS
362
- case duration_str.to_s.strip
363
- when /\A(\d{1,6})d\z/
364
- ::Regexp.last_match(1).to_i * 24 * 60 * 60
365
- when /\A(\d{1,6})w\z/
366
- ::Regexp.last_match(1).to_i * 7 * 24 * 60 * 60
367
- when /\A(\d{1,6})m\z/
368
- ::Regexp.last_match(1).to_i * 30 * 24 * 60 * 60
369
- when /\A(\d{1,6})y\z/
370
- ::Regexp.last_match(1).to_i * 365 * 24 * 60 * 60
371
- else
372
- 30 * 24 * 60 * 60 # Default to 30 days
373
- end
374
- end
375
-
376
- def parse_size(size_str)
377
- # Parse size strings like "100MB", "1GB", etc.
378
- # Use anchored patterns with limited digit repetition to prevent ReDoS
379
- case size_str.to_s.strip
380
- when /\A(\d{1,10})KB\z/i
381
- ::Regexp.last_match(1).to_i * 1024
382
- when /\A(\d{1,10})MB\z/i
383
- ::Regexp.last_match(1).to_i * 1024 * 1024
384
- when /\A(\d{1,10})GB\z/i
385
- ::Regexp.last_match(1).to_i * 1024 * 1024 * 1024
386
- else
387
- 100 * 1024 * 1024 # Default to 100MB
388
- end
389
- end
390
-
391
- def get_repository_time_range
392
- # Get the time range of the repository
393
- # This would use git commands to get the first and last commit dates
394
- {
395
- start: Time.now - (365 * 24 * 60 * 60), # 1 year ago
396
- end: Time.now
397
- }
398
- end
399
-
400
- def get_commits_in_time_range(start_time, end_time)
401
- # Get commits within the specified time range
402
- # This would use git log with date filtering
403
- []
404
- end
405
-
406
- def get_files_in_time_range(start_time, end_time)
407
- # Get files modified within the specified time range
408
- # This would use git log --name-only with date filtering
409
- []
410
- end
411
-
412
- def get_all_commits
413
- # Get all commits in the repository
414
- # This would use git log
415
- []
416
- end
417
-
418
- def get_files_for_commits(commits)
419
- # Get files modified in the specified commits
420
- # This would use git show --name-only
421
- []
422
- end
423
-
424
- def analyze_repository_structure
425
- # Analyze the repository structure to get file sizes and organization
426
- structure = []
427
-
428
- Dir.glob(File.join(@project_dir, "**", "*")).each do |path|
429
- next unless File.file?(path)
430
-
431
- relative_path = path.sub(@project_dir + "/", "")
432
- structure << {
433
- path: relative_path,
434
- size: File.size(path),
435
- type: File.extname(path)
436
- }
437
- end
438
-
439
- structure
440
- end
441
-
442
- def identify_features
443
- # Identify features/components in the repository
444
- features = []
445
-
446
- # Look for common feature patterns
447
- feature_patterns = [
448
- "app/features/**/*",
449
- "features/**/*",
450
- "src/features/**/*",
451
- "lib/features/**/*"
452
- ]
453
-
454
- feature_patterns.each do |pattern|
455
- Dir.glob(File.join(@project_dir, pattern)).each do |path|
456
- next unless Dir.exist?(path)
457
-
458
- feature_name = File.basename(path)
459
- features << {
460
- name: feature_name,
461
- path: path.sub(@project_dir + "/", ""),
462
- type: "directory"
463
- }
464
- end
465
- end
466
-
467
- features
468
- end
469
-
470
- def get_feature_files(feature)
471
- # Get files associated with a feature
472
- feature_path = File.join(@project_dir, feature[:path])
473
- return [] unless Dir.exist?(feature_path)
474
-
475
- files = []
476
- Dir.glob(File.join(feature_path, "**", "*")).each do |path|
477
- next unless File.file?(path)
478
-
479
- files << path.sub(@project_dir + "/", "")
480
- end
481
-
482
- files
483
- end
484
-
485
- def get_feature_commits(feature)
486
- # Get commits related to a feature
487
- # This would use git log with path filtering
488
- []
489
- end
490
-
491
- def split_large_feature(feature, files, commits, max_files, max_commits)
492
- # Split a large feature into smaller chunks
493
- chunks = []
494
-
495
- # Split by files
496
- files.each_slice(max_files) do |file_chunk|
497
- chunks << {
498
- id: generate_chunk_id("feature", "#{feature[:name]}_files_#{chunks.length}"),
499
- strategy: "feature_based",
500
- feature: feature,
501
- files: file_chunk,
502
- commits: commits,
503
- file_count: file_chunk.length,
504
- commit_count: commits.length
505
- }
506
- end
507
-
508
- chunks
509
- end
510
-
511
- def generate_chunk_id(prefix, identifier)
512
- timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
513
- "#{prefix}_#{identifier}_#{timestamp}"
514
- end
515
-
516
- def estimate_chunk_analysis_duration(chunk, analysis_type)
517
- # Estimate analysis duration based on chunk size and analysis type
518
- base_duration = case analysis_type
519
- when "static_analysis"
520
- 30 # seconds per file
521
- when "security_analysis"
522
- 60 # seconds per file
523
- when "performance_analysis"
524
- 45 # seconds per file
525
- else
526
- 30 # seconds per file
527
- end
528
-
529
- file_count = chunk[:files]&.length || 0
530
- commit_count = chunk[:commits]&.length || 0
531
-
532
- (file_count + commit_count) * base_duration
533
- end
534
-
535
- def get_chunk_dependencies(chunk, all_chunks)
536
- # Get dependencies between chunks
537
- []
538
-
539
- # This would analyze relationships between chunks
540
- # For now, return empty array
541
- end
542
-
543
- def calculate_chunk_priority(chunk, analysis_type)
544
- # Calculate priority for chunk analysis
545
- priority = 0
546
-
547
- # Higher priority for larger chunks
548
- priority += chunk[:files]&.length || 0
549
- priority += chunk[:commits]&.length || 0
550
-
551
- # Higher priority for certain analysis types
552
- case analysis_type
553
- when "security_analysis"
554
- priority *= 2
555
- when "performance_analysis"
556
- priority *= 1.5
557
- end
558
-
559
- priority
560
- end
561
-
562
- def estimate_chunk_resources(chunk, analysis_type)
563
- # Estimate resource requirements for chunk analysis
564
- {
565
- memory: estimate_memory_usage(chunk),
566
- cpu: estimate_cpu_usage(chunk, analysis_type),
567
- disk: estimate_disk_usage(chunk)
568
- }
569
- end
570
-
571
- def estimate_memory_usage(chunk)
572
- # Estimate memory usage based on chunk size
573
- file_count = chunk[:files]&.length || 0
574
- commit_count = chunk[:commits]&.length || 0
575
-
576
- (file_count + commit_count) * 1024 * 1024 # 1MB per item
577
- end
578
-
579
- def estimate_cpu_usage(chunk, analysis_type)
580
- # Estimate CPU usage based on analysis type
581
- case analysis_type
582
- when "static_analysis"
583
- "medium"
584
- when "security_analysis"
585
- "high"
586
- when "performance_analysis"
587
- "high"
588
- else
589
- "low"
590
- end
591
- end
592
-
593
- def estimate_disk_usage(chunk)
594
- # Estimate disk usage for temporary files
595
- file_count = chunk[:files]&.length || 0
596
- file_count * 1024 * 1024 # 1MB per file for temporary storage
597
- end
598
-
599
- def analyze_time_chunk(chunk, analysis_type, options)
600
- # Analyze a time-based chunk
601
- {
602
- time_range: {
603
- start: chunk[:start_time],
604
- end: chunk[:end_time]
605
- },
606
- commits: chunk[:commits],
607
- files: chunk[:files],
608
- analysis_results: {}
609
- }
610
- end
611
-
612
- def analyze_commit_chunk(chunk, analysis_type, options)
613
- # Analyze a commit-based chunk
614
- {
615
- commit_range: {
616
- start: chunk[:start_index],
617
- end: chunk[:end_index]
618
- },
619
- commits: chunk[:commits],
620
- files: chunk[:files],
621
- analysis_results: {}
622
- }
623
- end
624
-
625
- def analyze_size_chunk(chunk, analysis_type, options)
626
- # Analyze a size-based chunk
627
- {
628
- size: chunk[:size],
629
- files: chunk[:files],
630
- directories: chunk[:directories],
631
- analysis_results: {}
632
- }
633
- end
634
-
635
- def analyze_feature_chunk(chunk, analysis_type, options)
636
- # Analyze a feature-based chunk
637
- {
638
- feature: chunk[:feature],
639
- files: chunk[:files],
640
- commits: chunk[:commits],
641
- analysis_results: {}
642
- }
643
- end
644
-
645
- def merge_chunk_data(merged_data, chunk_data)
646
- # Merge data from a chunk into the merged results
647
- chunk_data.each do |key, value|
648
- if merged_data[key].is_a?(Array) && value.is_a?(Array)
649
- merged_data[key].concat(value)
650
- elsif merged_data[key].is_a?(Hash) && value.is_a?(Hash)
651
- merged_data[key].merge!(value)
652
- else
653
- merged_data[key] = value
654
- end
655
- end
656
- end
657
-
658
- def calculate_average_chunk_size(chunks)
659
- return 0 if chunks.empty?
660
-
661
- total_size = chunks.sum do |chunk|
662
- (chunk[:files]&.length || 0) + (chunk[:commits]&.length || 0)
663
- end
664
-
665
- total_size.to_f / chunks.length
666
- end
667
-
668
- def analyze_chunk_distribution(chunks)
669
- # Analyze the distribution of chunk sizes
670
- sizes = chunks.map do |chunk|
671
- (chunk[:files]&.length || 0) + (chunk[:commits]&.length || 0)
672
- end
673
-
674
- {
675
- min: sizes.min,
676
- max: sizes.max,
677
- average: sizes.sum.to_f / sizes.length,
678
- median: calculate_median(sizes)
679
- }
680
- end
681
-
682
- def calculate_median(values)
683
- sorted = values.sort
684
- length = sorted.length
685
-
686
- if length.odd?
687
- sorted[length / 2]
688
- else
689
- (sorted[length / 2 - 1] + sorted[length / 2]) / 2.0
690
- end
691
- end
692
-
693
- def get_strategy_statistics(chunks, strategy)
694
- # Get statistics for a specific chunking strategy
695
- {
696
- chunk_count: chunks.length,
697
- total_files: chunks.sum { |c| c[:files]&.length || 0 },
698
- total_commits: chunks.sum { |c| c[:commits]&.length || 0 },
699
- average_size: calculate_average_chunk_size(chunks)
700
- }
701
- end
702
- end
703
- end
704
- end