gitingest 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/index.html +20 -2
- data/lib/gitingest/generator.rb +156 -179
- data/lib/gitingest/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 016473eb12d22b0847b8a5f45ed24b57a6211cb7a826662a5c29f66f9160033b
|
4
|
+
data.tar.gz: abd39a8c8416a9ba60627bda71e0e0c265df3fd46e12fdf4f893c40d627783a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b7f54743d80d51cb347ff13f4b5fe1cd892143df4867da989bc51b972d094c3bba0068ddd571420440c4a6fa1f7591ebdbb61ddc37f140553cea869f592c3bc
|
7
|
+
data.tar.gz: d1797eef97f58f9119d474e6fdd18be3b1670b6c3bbbad35866d42f0a8f0e7066f886f7ac6ddc1da0cf7d9bd552f4f962cbc5e9b0360d04e3952c102f09877ce
|
data/CHANGELOG.md
CHANGED
data/index.html
CHANGED
@@ -716,7 +716,7 @@
|
|
716
716
|
<div class="header-container">
|
717
717
|
<div class="logo">
|
718
718
|
<div class="logo-icon">G</div>
|
719
|
-
<div class="logo-text">Gitingest <span class="version-tag">v0.6.
|
719
|
+
<div class="logo-text">Gitingest <span class="version-tag">v0.6.1</span></div>
|
720
720
|
</div>
|
721
721
|
<nav>
|
722
722
|
<ul>
|
@@ -765,7 +765,7 @@
|
|
765
765
|
<span class="terminal-command">gem install gitingest</span>
|
766
766
|
</div>
|
767
767
|
<div class="terminal-output">
|
768
|
-
Successfully installed gitingest-0.
|
768
|
+
Successfully installed gitingest-0.6.1<br />
|
769
769
|
1 gem installed
|
770
770
|
</div>
|
771
771
|
<div class="terminal-line">
|
@@ -902,6 +902,24 @@ gitingest --repository user/repo --show-structure</code></pre>
|
|
902
902
|
<div class="container">
|
903
903
|
<h2>Changelog</h2>
|
904
904
|
<div class="timeline">
|
905
|
+
<div class="timeline-item">
|
906
|
+
<div class="timeline-date">
|
907
|
+
<span class="timeline-month">Mar</span>
|
908
|
+
<span class="timeline-day">26</span>
|
909
|
+
</div>
|
910
|
+
<div class="timeline-content">
|
911
|
+
<h3 class="timeline-version">v0.6.1</h3>
|
912
|
+
<p class="timeline-desc">Fixed error in exclude patterns functionality when using glob patterns.
|
913
|
+
</p>
|
914
|
+
<div class="timeline-list">
|
915
|
+
<ul>
|
916
|
+
<li>Fixed error "target of repeat operator is not specified" when using
|
917
|
+
<code>--exclude</code> with glob patterns like <code>*.md</code>
|
918
|
+
</li>
|
919
|
+
</ul>
|
920
|
+
</div>
|
921
|
+
</div>
|
922
|
+
</div>
|
905
923
|
<div class="timeline-item">
|
906
924
|
<div class="timeline-date">
|
907
925
|
<span class="timeline-month">Mar</span>
|
data/lib/gitingest/generator.rb
CHANGED
@@ -56,7 +56,7 @@ module Gitingest
|
|
56
56
|
".*\.o$", ".*\.obj$", ".*\.dll$", ".*\.dylib$", ".*\.exe$",
|
57
57
|
".*\.lib$", ".*\.out$", ".*\.a$", ".*\.pdb$", ".*\.nupkg$",
|
58
58
|
|
59
|
-
# Language
|
59
|
+
# Language-specific files
|
60
60
|
".*\.min\.js$", ".*\.min\.css$", ".*\.map$", ".*\.tfstate.*",
|
61
61
|
".*\.gem$", ".*\.ruby-version", ".*\.ruby-gemset", ".*\.rvmrc",
|
62
62
|
".*\.rs\.bk$", ".*\.gradle", ".*\.suo", ".*\.user", ".*\.userosscache",
|
@@ -65,38 +65,24 @@ module Gitingest
|
|
65
65
|
"\.swiftpm/", "\.build/"
|
66
66
|
].freeze
|
67
67
|
|
68
|
-
#
|
68
|
+
# Pattern for dot files/directories
|
69
69
|
DOT_FILE_PATTERN = %r{(?-mix:(^\.|/\.))}
|
70
70
|
|
71
71
|
# Maximum number of files to process to prevent memory overload
|
72
72
|
MAX_FILES = 1000
|
73
73
|
|
74
|
-
#
|
74
|
+
# Buffer size to reduce I/O operations
|
75
75
|
BUFFER_SIZE = 250
|
76
76
|
|
77
|
-
#
|
77
|
+
# Thread-local buffer threshold
|
78
78
|
LOCAL_BUFFER_THRESHOLD = 50
|
79
79
|
|
80
|
-
#
|
80
|
+
# Default threading options
|
81
81
|
DEFAULT_THREAD_COUNT = [Concurrent.processor_count, 8].min
|
82
82
|
DEFAULT_THREAD_TIMEOUT = 60 # seconds
|
83
83
|
|
84
84
|
attr_reader :options, :client, :repo_files, :excluded_patterns, :logger
|
85
85
|
|
86
|
-
# Initialize a new Generator with the given options
|
87
|
-
#
|
88
|
-
# @param options [Hash] Configuration options
|
89
|
-
# @option options [String] :repository GitHub repository in format "username/repo"
|
90
|
-
# @option options [String] :token GitHub personal access token
|
91
|
-
# @option options [String] :branch Repository branch (default: "main")
|
92
|
-
# @option options [String] :output_file Output file path
|
93
|
-
# @option options [Array<String>] :exclude Additional patterns to exclude
|
94
|
-
# @option options [Boolean] :quiet Reduce logging to errors only
|
95
|
-
# @option options [Boolean] :verbose Increase logging verbosity
|
96
|
-
# @option options [Logger] :logger Custom logger instance
|
97
|
-
# @option options [Integer] :threads Number of threads to use (default: auto-detected)
|
98
|
-
# @option options [Integer] :thread_timeout Seconds to wait for thread pool shutdown (default: 60)
|
99
|
-
# @option options [Boolean] :show_structure Show repository directory structure (default: false)
|
100
86
|
def initialize(options = {})
|
101
87
|
@options = options
|
102
88
|
@repo_files = []
|
@@ -107,68 +93,46 @@ module Gitingest
|
|
107
93
|
compile_excluded_patterns
|
108
94
|
end
|
109
95
|
|
110
|
-
# Main execution method for command line
|
111
96
|
def run
|
112
97
|
fetch_repository_contents
|
113
|
-
|
114
98
|
if @options[:show_structure]
|
115
99
|
puts generate_directory_structure
|
116
100
|
return
|
117
101
|
end
|
118
|
-
|
119
102
|
generate_file
|
120
103
|
end
|
121
104
|
|
122
|
-
# Generate content and save it to a file
|
123
|
-
#
|
124
|
-
# @return [String] Path to the generated file
|
125
105
|
def generate_file
|
126
106
|
fetch_repository_contents if @repo_files.empty?
|
127
|
-
|
128
107
|
@logger.info "Generating file for #{@options[:repository]}"
|
129
108
|
File.open(@options[:output_file], "w") do |file|
|
130
109
|
process_content_to_output(file)
|
131
110
|
end
|
132
|
-
|
133
111
|
@logger.info "Prompt generated and saved to #{@options[:output_file]}"
|
134
112
|
@options[:output_file]
|
135
113
|
end
|
136
114
|
|
137
|
-
# Generate content and return it as a string
|
138
|
-
# Useful for programmatic usage
|
139
|
-
#
|
140
|
-
# @return [String] The generated repository content
|
141
115
|
def generate_prompt
|
142
116
|
@logger.info "Generating in-memory prompt for #{@options[:repository]}"
|
143
|
-
|
144
117
|
fetch_repository_contents if @repo_files.empty?
|
145
|
-
|
146
118
|
content = StringIO.new
|
147
119
|
process_content_to_output(content)
|
148
|
-
|
149
120
|
result = content.string
|
150
121
|
@logger.info "Generated #{result.size} bytes of content in memory"
|
151
122
|
result
|
152
123
|
end
|
153
124
|
|
154
|
-
# Generate a textual representation of the repository's directory structure
|
155
|
-
#
|
156
|
-
# @return [String] The directory structure as a formatted string
|
157
125
|
def generate_directory_structure
|
158
126
|
fetch_repository_contents if @repo_files.empty?
|
159
|
-
|
160
127
|
@logger.info "Generating directory structure for #{@options[:repository]}"
|
161
|
-
|
162
128
|
repo_name = @options[:repository].split("/").last
|
163
129
|
structure = DirectoryStructureBuilder.new(repo_name, @repo_files).build
|
164
|
-
|
165
130
|
@logger.info "\n"
|
166
131
|
structure
|
167
132
|
end
|
168
133
|
|
169
134
|
private
|
170
135
|
|
171
|
-
# Set up logging based on verbosity options
|
172
136
|
def setup_logger
|
173
137
|
@logger = @options[:logger] || Logger.new($stdout)
|
174
138
|
@logger.level = if @options[:quiet]
|
@@ -178,11 +142,9 @@ module Gitingest
|
|
178
142
|
else
|
179
143
|
Logger::INFO
|
180
144
|
end
|
181
|
-
# Simplify logger format for command line usage
|
182
145
|
@logger.formatter = proc { |severity, _, _, msg| "#{severity == "INFO" ? "" : "[#{severity}] "}#{msg}\n" }
|
183
146
|
end
|
184
147
|
|
185
|
-
# Validate and set default options
|
186
148
|
def validate_options
|
187
149
|
raise ArgumentError, "Repository is required" unless @options[:repository]
|
188
150
|
|
@@ -195,10 +157,8 @@ module Gitingest
|
|
195
157
|
@excluded_patterns = DEFAULT_EXCLUDES + @options[:exclude]
|
196
158
|
end
|
197
159
|
|
198
|
-
# Configure the GitHub API client
|
199
160
|
def configure_client
|
200
161
|
@client = @options[:token] ? Octokit::Client.new(access_token: @options[:token]) : Octokit::Client.new
|
201
|
-
|
202
162
|
if @options[:token]
|
203
163
|
@logger.info "Using provided GitHub token for authentication"
|
204
164
|
else
|
@@ -207,73 +167,152 @@ module Gitingest
|
|
207
167
|
end
|
208
168
|
end
|
209
169
|
|
210
|
-
# Optimization: Create a combined regex for faster exclusion checking
|
211
170
|
def compile_excluded_patterns
|
212
|
-
|
213
|
-
@
|
171
|
+
@default_patterns = DEFAULT_EXCLUDES.map { |pattern| Regexp.new(pattern) }
|
172
|
+
@custom_patterns = []
|
173
|
+
@glob_patterns_with_char_classes = []
|
174
|
+
|
175
|
+
@options[:exclude].each do |glob_pattern|
|
176
|
+
if glob_pattern.include?("[") && glob_pattern.include?("]")
|
177
|
+
@glob_patterns_with_char_classes << glob_pattern
|
178
|
+
else
|
179
|
+
@custom_patterns << Regexp.new(glob_to_regex(glob_pattern))
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def glob_to_regex(pattern)
|
185
|
+
result = "^"
|
186
|
+
in_brackets = false
|
187
|
+
pattern.each_char do |c|
|
188
|
+
case c
|
189
|
+
when "[" then in_brackets = true
|
190
|
+
result += c
|
191
|
+
when "]" then in_brackets = false
|
192
|
+
result += c
|
193
|
+
when "*" then result += in_brackets ? "*" : ".*"
|
194
|
+
when ".", "\\", "+", "?", "|", "{", "}", "(", ")", "^", "$" then result += in_brackets ? c : "\\#{c}"
|
195
|
+
else result += c
|
196
|
+
end
|
197
|
+
end
|
198
|
+
"#{result}$"
|
214
199
|
end
|
215
200
|
|
216
|
-
# Fetch repository contents and apply exclusion filters
|
217
201
|
def fetch_repository_contents
|
218
202
|
@logger.info "Fetching repository: #{@options[:repository]} (branch: #{@options[:branch]})"
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
@logger.warn "Warning: Found #{@repo_files.size} files, limited to #{MAX_FILES}."
|
226
|
-
@repo_files = @repo_files.first(MAX_FILES)
|
227
|
-
end
|
228
|
-
@logger.info "Found #{@repo_files.size} files after exclusion filters"
|
229
|
-
rescue Octokit::Unauthorized
|
230
|
-
raise "Authentication error: Invalid or expired GitHub token. Please provide a valid token."
|
231
|
-
rescue Octokit::NotFound
|
232
|
-
raise "Repository not found: '#{@options[:repository]}' or branch '#{@options[:branch]}' doesn't exist or is private."
|
233
|
-
rescue Octokit::Error => e
|
234
|
-
raise "Error accessing repository: #{e.message}"
|
203
|
+
validate_repository_access
|
204
|
+
repo_tree = @client.tree(@options[:repository], @options[:branch], recursive: true)
|
205
|
+
@repo_files = repo_tree.tree.select { |item| item.type == "blob" && !excluded_file?(item.path) }
|
206
|
+
if @repo_files.size > MAX_FILES
|
207
|
+
@logger.warn "Warning: Found #{@repo_files.size} files, limited to #{MAX_FILES}."
|
208
|
+
@repo_files = @repo_files.first(MAX_FILES)
|
235
209
|
end
|
210
|
+
@logger.info "Found #{@repo_files.size} files after exclusion filters"
|
211
|
+
rescue Octokit::Unauthorized
|
212
|
+
raise "Authentication error: Invalid or expired GitHub token."
|
213
|
+
rescue Octokit::NotFound
|
214
|
+
raise "Repository not found: '#{@options[:repository]}' or branch '#{@options[:branch]}' doesn't exist or is private."
|
215
|
+
rescue Octokit::Error => e
|
216
|
+
raise "Error accessing repository: #{e.message}"
|
236
217
|
end
|
237
218
|
|
238
219
|
# Validate repository and branch access
|
239
220
|
def validate_repository_access
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
rescue Octokit::NotFound
|
246
|
-
raise "Repository '#{@options[:repository]}' not found or is private. Check the repository name or provide a valid token."
|
247
|
-
end
|
221
|
+
repo = @client.repository(@options[:repository])
|
222
|
+
@options[:branch] = repo.default_branch if @options[:branch] == :default
|
223
|
+
|
224
|
+
# If repository check succeeds, store this fact before trying branch
|
225
|
+
@repository_exists = true
|
248
226
|
|
249
227
|
begin
|
250
228
|
@client.branch(@options[:repository], @options[:branch])
|
251
229
|
rescue Octokit::NotFound
|
230
|
+
# If we got here, the repository exists but the branch doesn't
|
252
231
|
raise "Branch '#{@options[:branch]}' not found in repository '#{@options[:repository]}'"
|
253
232
|
end
|
233
|
+
rescue Octokit::Unauthorized
|
234
|
+
raise "Authentication error: Invalid or expired GitHub token"
|
235
|
+
rescue Octokit::NotFound
|
236
|
+
# Only reach this for repository not found (branch errors handled separately)
|
237
|
+
raise "Repository '#{@options[:repository]}' not found or is private. Check the repository name or provide a valid token."
|
254
238
|
end
|
255
239
|
|
256
|
-
# Optimization: Optimized file exclusion check with combined regex
|
257
240
|
def excluded_file?(path)
|
258
|
-
path.match?(
|
241
|
+
return true if path.match?(DOT_FILE_PATTERN)
|
242
|
+
return true if @default_patterns.any? { |pattern| path.match?(pattern) }
|
243
|
+
return true if @custom_patterns.any? { |pattern| path.match?(pattern) }
|
244
|
+
|
245
|
+
@glob_patterns_with_char_classes.any? { |glob_pattern| glob_match?(glob_pattern, path) }
|
246
|
+
end
|
247
|
+
|
248
|
+
def glob_match?(pattern, string)
|
249
|
+
return true if pattern == string
|
250
|
+
return false if !pattern.match?(/[*?\[]/) && pattern != string
|
251
|
+
|
252
|
+
pattern_idx = 0
|
253
|
+
string_idx = 0
|
254
|
+
|
255
|
+
while pattern_idx < pattern.length && string_idx < string.length
|
256
|
+
case pattern[pattern_idx]
|
257
|
+
when "*"
|
258
|
+
pattern_idx += 1 while pattern_idx + 1 < pattern.length && pattern[pattern_idx + 1] == "*"
|
259
|
+
return true if pattern_idx == pattern.length - 1
|
260
|
+
|
261
|
+
next_char = pattern[pattern_idx + 1]
|
262
|
+
pattern_idx += 1
|
263
|
+
while string_idx < string.length
|
264
|
+
break if string[string_idx] == next_char || next_char == "?" ||
|
265
|
+
(next_char == "[" && char_class_match?(pattern, pattern_idx, string[string_idx]))
|
266
|
+
|
267
|
+
string_idx += 1
|
268
|
+
end
|
269
|
+
when "?" then string_idx += 1
|
270
|
+
pattern_idx += 1
|
271
|
+
when "["
|
272
|
+
return false unless char_class_match?(pattern, pattern_idx, string[string_idx])
|
273
|
+
|
274
|
+
pattern_idx += 1
|
275
|
+
pattern_idx += 1 while pattern_idx < pattern.length && pattern[pattern_idx] != "]"
|
276
|
+
pattern_idx += 1
|
277
|
+
string_idx += 1
|
278
|
+
when string[string_idx] then string_idx += 1
|
279
|
+
pattern_idx += 1
|
280
|
+
else return false
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
pattern_idx += 1 while pattern_idx < pattern.length && pattern[pattern_idx] == "*"
|
285
|
+
pattern_idx == pattern.length && string_idx == string.length
|
286
|
+
end
|
287
|
+
|
288
|
+
def char_class_match?(pattern, class_start_idx, char)
|
289
|
+
idx = class_start_idx + 1
|
290
|
+
match = false
|
291
|
+
negate = pattern[idx] == "^" && (idx += 1)
|
292
|
+
|
293
|
+
while idx < pattern.length && pattern[idx] != "]"
|
294
|
+
if idx + 2 < pattern.length && pattern[idx + 1] == "-"
|
295
|
+
range_start = pattern[idx]
|
296
|
+
range_end = pattern[idx + 2]
|
297
|
+
match = true if char >= range_start && char <= range_end
|
298
|
+
idx += 3
|
299
|
+
else
|
300
|
+
match = true if pattern[idx] == char
|
301
|
+
idx += 1
|
302
|
+
end
|
303
|
+
break if match
|
304
|
+
end
|
305
|
+
negate ? !match : match
|
259
306
|
end
|
260
307
|
|
261
|
-
# Common implementation for both file and string output
|
262
308
|
def process_content_to_output(output)
|
263
309
|
@logger.debug "Using thread pool with #{@options[:threads]} threads"
|
264
|
-
|
265
310
|
buffer = []
|
266
311
|
progress = ProgressIndicator.new(@repo_files.size, @logger)
|
267
|
-
|
268
|
-
# Thread-local buffers to reduce mutex contention
|
269
312
|
thread_buffers = {}
|
270
313
|
mutex = Mutex.new
|
271
314
|
errors = []
|
272
|
-
|
273
|
-
# Thread pool based on configuration
|
274
315
|
pool = Concurrent::FixedThreadPool.new(@options[:threads])
|
275
|
-
|
276
|
-
# Group files by priority
|
277
316
|
prioritized_files = prioritize_files(@repo_files)
|
278
317
|
|
279
318
|
prioritized_files.each_with_index do |repo_file, index|
|
@@ -281,12 +320,9 @@ module Gitingest
|
|
281
320
|
thread_id = Thread.current.object_id
|
282
321
|
thread_buffers[thread_id] ||= []
|
283
322
|
local_buffer = thread_buffers[thread_id]
|
284
|
-
|
285
323
|
begin
|
286
324
|
content = fetch_file_content_with_retry(repo_file.path)
|
287
|
-
|
288
|
-
local_buffer << result
|
289
|
-
|
325
|
+
local_buffer << format_file_content(repo_file.path, content)
|
290
326
|
if local_buffer.size >= LOCAL_BUFFER_THRESHOLD
|
291
327
|
mutex.synchronize do
|
292
328
|
buffer.concat(local_buffer)
|
@@ -294,39 +330,24 @@ module Gitingest
|
|
294
330
|
local_buffer.clear
|
295
331
|
end
|
296
332
|
end
|
297
|
-
|
298
333
|
progress.update(index + 1)
|
299
334
|
rescue Octokit::Error => e
|
300
|
-
mutex.synchronize
|
301
|
-
|
302
|
-
@logger.error "Error fetching #{repo_file.path}: #{e.message}"
|
303
|
-
end
|
335
|
+
mutex.synchronize { errors << "Error fetching #{repo_file.path}: #{e.message}" }
|
336
|
+
@logger.error "Error fetching #{repo_file.path}: #{e.message}"
|
304
337
|
rescue StandardError => e
|
305
|
-
mutex.synchronize
|
306
|
-
|
307
|
-
@logger.error "Unexpected error processing #{repo_file.path}: #{e.message}"
|
308
|
-
end
|
338
|
+
mutex.synchronize { errors << "Unexpected error processing #{repo_file.path}: #{e.message}" }
|
339
|
+
@logger.error "Unexpected error processing #{repo_file.path}: #{e.message}"
|
309
340
|
end
|
310
341
|
end
|
311
342
|
end
|
312
343
|
|
313
|
-
|
314
|
-
|
315
|
-
wait_success = pool.wait_for_termination(@options[:thread_timeout])
|
344
|
+
pool.shutdown
|
345
|
+
pool.wait_for_termination(@options[:thread_timeout]) || (@logger.warn "Thread pool timeout, forcing termination"
|
316
346
|
|
317
|
-
|
318
|
-
@logger.warn "Thread pool did not shut down within #{@options[:thread_timeout]} seconds, forcing termination"
|
319
|
-
pool.kill
|
320
|
-
end
|
321
|
-
rescue StandardError => e
|
322
|
-
@logger.error "Error during thread pool shutdown: #{e.message}"
|
323
|
-
end
|
347
|
+
pool.kill)
|
324
348
|
|
325
|
-
# Process remaining files in thread-local buffers
|
326
349
|
mutex.synchronize do
|
327
|
-
thread_buffers.each_value
|
328
|
-
buffer.concat(local_buffer) unless local_buffer.empty?
|
329
|
-
end
|
350
|
+
thread_buffers.each_value { |local_buffer| buffer.concat(local_buffer) unless local_buffer.empty? }
|
330
351
|
write_buffer(output, buffer) unless buffer.empty?
|
331
352
|
end
|
332
353
|
|
@@ -336,7 +357,6 @@ module Gitingest
|
|
336
357
|
@logger.debug "First few errors: #{errors.first(3).join(", ")}" if @logger.debug?
|
337
358
|
end
|
338
359
|
|
339
|
-
# Format a file's content for the prompt
|
340
360
|
def format_file_content(path, content)
|
341
361
|
<<~TEXT
|
342
362
|
================================================================
|
@@ -347,21 +367,18 @@ module Gitingest
|
|
347
367
|
TEXT
|
348
368
|
end
|
349
369
|
|
350
|
-
# Optimization: Fetch file content with exponential backoff for rate limiting
|
351
370
|
def fetch_file_content_with_retry(path, retries = 3, base_delay = 2)
|
352
371
|
content = @client.contents(@options[:repository], path: path, ref: @options[:branch])
|
353
372
|
Base64.decode64(content.content)
|
354
373
|
rescue Octokit::TooManyRequests
|
355
374
|
raise unless retries.positive?
|
356
375
|
|
357
|
-
# Optimization: Exponential backoff with jitter for better rate limit handling
|
358
376
|
delay = base_delay**(4 - retries) * (0.8 + 0.4 * rand)
|
359
377
|
@logger.warn "Rate limit exceeded, waiting #{delay.round(1)} seconds..."
|
360
378
|
sleep(delay)
|
361
379
|
fetch_file_content_with_retry(path, retries - 1, base_delay)
|
362
380
|
end
|
363
381
|
|
364
|
-
# Write buffer contents to file and clear buffer
|
365
382
|
def write_buffer(file, buffer)
|
366
383
|
return if buffer.empty?
|
367
384
|
|
@@ -369,26 +386,20 @@ module Gitingest
|
|
369
386
|
buffer.clear
|
370
387
|
end
|
371
388
|
|
372
|
-
# Sort files by estimated processing priority
|
373
389
|
def prioritize_files(files)
|
374
|
-
# Sort files by estimated size (based on extension)
|
375
|
-
# This helps with better thread distribution - process small files first
|
376
390
|
files.sort_by do |file|
|
377
391
|
path = file.path.downcase
|
378
|
-
if path.end_with?(".md", ".txt", ".json", ".yaml", ".yml")
|
379
|
-
|
380
|
-
elsif path.end_with?(".rb", ".py", ".js", ".ts", ".go", ".java", ".c", ".cpp", ".h")
|
381
|
-
1 # Then process code files (medium size)
|
392
|
+
if path.end_with?(".md", ".txt", ".json", ".yaml", ".yml") then 0
|
393
|
+
elsif path.end_with?(".rb", ".py", ".js", ".ts", ".go", ".java", ".c", ".cpp", ".h") then 1
|
382
394
|
else
|
383
|
-
2
|
395
|
+
2
|
384
396
|
end
|
385
397
|
end
|
386
398
|
end
|
387
399
|
end
|
388
400
|
|
389
|
-
# Helper class for showing progress in CLI with visual bar
|
390
401
|
class ProgressIndicator
|
391
|
-
BAR_WIDTH = 30
|
402
|
+
BAR_WIDTH = 30
|
392
403
|
|
393
404
|
def initialize(total, logger)
|
394
405
|
@total = total
|
@@ -396,77 +407,47 @@ module Gitingest
|
|
396
407
|
@last_percent = 0
|
397
408
|
@start_time = Time.now
|
398
409
|
@last_update_time = Time.now
|
399
|
-
@update_interval = 0.5
|
410
|
+
@update_interval = 0.5
|
400
411
|
end
|
401
412
|
|
402
|
-
# Update progress with visual bar
|
403
413
|
def update(current)
|
404
|
-
# Avoid updating too frequently
|
405
414
|
now = Time.now
|
406
415
|
return if now - @last_update_time < @update_interval && current != @total
|
407
416
|
|
408
417
|
@last_update_time = now
|
409
418
|
percent = (current.to_f / @total * 100).round
|
410
|
-
|
411
|
-
# Only update at meaningful increments or completion
|
412
419
|
return unless percent > @last_percent || current == @total
|
413
420
|
|
414
421
|
elapsed = now - @start_time
|
415
|
-
|
416
|
-
# Generate progress bar
|
417
422
|
progress_chars = (BAR_WIDTH * (current.to_f / @total)).round
|
418
423
|
bar = "[#{"|" * progress_chars}#{" " * (BAR_WIDTH - progress_chars)}]"
|
419
|
-
|
420
|
-
# Calculate ETA
|
421
|
-
eta_string = ""
|
422
|
-
if current > 1 && percent < 100
|
423
|
-
remaining = (elapsed / current) * (@total - current)
|
424
|
-
eta_string = " ETA: #{format_time(remaining)}"
|
425
|
-
end
|
426
|
-
|
427
|
-
# Calculate rate (files per second)
|
424
|
+
eta_string = current > 1 && percent < 100 ? " ETA: #{format_time((elapsed / current) * (@total - current))}" : ""
|
428
425
|
rate = begin
|
429
|
-
current / elapsed
|
426
|
+
(current / elapsed).round(1)
|
430
427
|
rescue StandardError
|
431
428
|
0
|
432
429
|
end
|
433
|
-
|
434
|
-
|
435
|
-
# Clear line and print progress bar
|
436
|
-
print "\r\e[K" # Clear the line
|
437
|
-
print "#{bar} #{percent}% | #{current}/#{@total} files#{rate_string}#{eta_string}"
|
438
|
-
print "\n" if current == @total # Add newline when complete
|
439
|
-
|
440
|
-
# Also log to logger at less frequent intervals
|
430
|
+
print "\r\e[K#{bar} #{percent}% | #{current}/#{@total} files (#{rate} files/sec)#{eta_string}"
|
431
|
+
print "\n" if current == @total
|
441
432
|
if (percent % 10).zero? && percent != @last_percent || current == @total
|
442
433
|
@logger.info "Processing: #{percent}% complete (#{current}/#{@total} files)#{eta_string}"
|
443
434
|
end
|
444
|
-
|
445
435
|
@last_percent = percent
|
446
436
|
end
|
447
437
|
|
448
438
|
private
|
449
439
|
|
450
|
-
# Format seconds into a human-readable time string
|
451
440
|
def format_time(seconds)
|
452
441
|
return "< 1s" if seconds < 1
|
453
442
|
|
454
443
|
case seconds
|
455
|
-
when 0...60
|
456
|
-
|
457
|
-
|
458
|
-
minutes = (seconds / 60).floor
|
459
|
-
secs = (seconds % 60).round
|
460
|
-
"#{minutes}m #{secs}s"
|
461
|
-
else
|
462
|
-
hours = (seconds / 3600).floor
|
463
|
-
minutes = ((seconds % 3600) / 60).floor
|
464
|
-
"#{hours}h #{minutes}m"
|
444
|
+
when 0...60 then "#{seconds.round}s"
|
445
|
+
when 60...3600 then "#{(seconds / 60).floor}m #{(seconds % 60).round}s"
|
446
|
+
else "#{(seconds / 3600).floor}h #{((seconds % 3600) / 60).floor}m"
|
465
447
|
end
|
466
448
|
end
|
467
449
|
end
|
468
450
|
|
469
|
-
# Helper class to build directory structure visualization
|
470
451
|
class DirectoryStructureBuilder
|
471
452
|
def initialize(root_name, files)
|
472
453
|
@root_name = root_name
|
@@ -475,21 +456,17 @@ module Gitingest
|
|
475
456
|
|
476
457
|
def build
|
477
458
|
tree = { @root_name => {} }
|
478
|
-
|
479
459
|
@files.sort.each do |path|
|
480
460
|
parts = path.split("/")
|
481
461
|
current = tree[@root_name]
|
482
|
-
|
483
462
|
parts.each do |part|
|
484
|
-
if part == parts.last
|
485
|
-
current[part] = nil
|
463
|
+
if part == parts.last then current[part] = nil
|
486
464
|
else
|
487
465
|
current[part] ||= {}
|
488
466
|
current = current[part]
|
489
467
|
end
|
490
468
|
end
|
491
469
|
end
|
492
|
-
|
493
470
|
output = ["Directory structure:"]
|
494
471
|
render_tree(tree, "", output)
|
495
472
|
output.join("\n")
|
@@ -502,18 +479,18 @@ module Gitingest
|
|
502
479
|
|
503
480
|
tree.keys.each_with_index do |key, index|
|
504
481
|
is_last = index == tree.keys.size - 1
|
505
|
-
current_prefix = prefix
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
482
|
+
current_prefix = if prefix.empty?
|
483
|
+
" "
|
484
|
+
else
|
485
|
+
prefix + (is_last ? " " : "│ ")
|
486
|
+
end
|
487
|
+
connector = if prefix.empty?
|
488
|
+
"└── "
|
489
|
+
else
|
490
|
+
(is_last ? "└── " : "├── ")
|
491
|
+
end
|
492
|
+
item = tree[key].is_a?(Hash) ? "#{key}/" : key
|
493
|
+
output << "#{prefix}#{connector}#{item}"
|
517
494
|
render_tree(tree[key], current_prefix, output) if tree[key].is_a?(Hash)
|
518
495
|
end
|
519
496
|
end
|
data/lib/gitingest/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gitingest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Davide Santangelo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-03-
|
11
|
+
date: 2025-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: concurrent-ruby
|