gitingest 0.6.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -14
- data/index.html +39 -15
- data/lib/gitingest/generator.rb +51 -133
- data/lib/gitingest/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c5544ec1d956710315693490fddc60f26452452ca674739d9a343f0418b26c6
|
4
|
+
data.tar.gz: 6d9dd377e78788bf3f3b0a6511cc38cfc9a5e85bba787f905f879235d767dc8c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 96154003ef92d35503ddb9488ee25f147c0809f88226a3554d818e43b6692908fae94e00d56a525a8c31327f7be3b0323486bb79227dac5fe36709cd99f09a11
|
7
|
+
data.tar.gz: 25d6c8de3537f84f27feedc6fb3ec812e14ad3ebf1cbe708b3931e757741d00a69d7e43a86f08bb5198cf1532f92b44d64bdf9c53afa2ec802c7013cb54bf366
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [0.7.0] - 2025-06-04
|
4
|
+
|
5
|
+
### Changed
|
6
|
+
- Improved file exclusion logic for glob patterns to correctly match files at any directory depth (e.g., `*.md` now correctly matches `docs/file.md`).
|
7
|
+
- Refined internal handling of exclusion patterns for clarity and robustness, using `File.fnmatch` for all custom glob patterns.
|
8
|
+
- Enhanced debug logging for file exclusion to show the specific pattern that caused a match.
|
9
|
+
|
3
10
|
## [0.6.3] - 2025-04-14
|
4
11
|
|
5
12
|
### Fixed
|
@@ -16,8 +23,6 @@
|
|
16
23
|
### Fixed
|
17
24
|
- Fixed error "target of repeat operator is not specified" when using `--exclude` with glob patterns like `*.md`
|
18
25
|
|
19
|
-
---
|
20
|
-
|
21
26
|
## [0.6.0] - 2025-03-18
|
22
27
|
|
23
28
|
### Changed
|
@@ -26,8 +31,6 @@
|
|
26
31
|
- Updated documentation to reflect the correct default branch behavior
|
27
32
|
- Fixed issues with repository validation in tests
|
28
33
|
|
29
|
-
---
|
30
|
-
|
31
34
|
## [0.5.0] - 2025-03-10
|
32
35
|
|
33
36
|
### Added
|
@@ -40,8 +43,6 @@
|
|
40
43
|
- Enhanced documentation with directory structure visualization examples
|
41
44
|
- Updated CLI help with the new option
|
42
45
|
|
43
|
-
---
|
44
|
-
|
45
46
|
## [0.4.0] - 2025-03-03
|
46
47
|
|
47
48
|
### Added
|
@@ -60,8 +61,6 @@
|
|
60
61
|
- Fixed race conditions in progress indicator updates
|
61
62
|
- Addressed timing inconsistencies in multithreaded test scenarios
|
62
63
|
|
63
|
-
---
|
64
|
-
|
65
64
|
## [0.3.1] - 2025-03-03
|
66
65
|
|
67
66
|
### Added
|
@@ -82,8 +81,6 @@
|
|
82
81
|
- Ensured thread pool shutdown respects the configured timeout.
|
83
82
|
- Resolved potential race conditions in file content retrieval.
|
84
83
|
|
85
|
-
---
|
86
|
-
|
87
84
|
## [0.3.0] - 2025-03-02
|
88
85
|
|
89
86
|
### Added
|
@@ -96,8 +93,6 @@
|
|
96
93
|
- Optimized thread pool size calculation for improved performance.
|
97
94
|
- Improved error handling in concurrent operations.
|
98
95
|
|
99
|
-
---
|
100
|
-
|
101
96
|
## [0.2.0] - 2025-03-02
|
102
97
|
|
103
98
|
### Added
|
@@ -111,8 +106,6 @@
|
|
111
106
|
- Enforced a 1000-file limit to prevent memory overload.
|
112
107
|
- Updated version to `0.2.0`.
|
113
108
|
|
114
|
-
---
|
115
|
-
|
116
109
|
## [0.1.0] - 2025-03-02
|
117
110
|
|
118
111
|
### Added
|
data/index.html
CHANGED
@@ -716,7 +716,7 @@
|
|
716
716
|
<div class="header-container">
|
717
717
|
<div class="logo">
|
718
718
|
<div class="logo-icon">G</div>
|
719
|
-
<div class="logo-text">Gitingest <span class="version-tag">v0.
|
719
|
+
<div class="logo-text">Gitingest <span class="version-tag">v0.7.0</span></div>
|
720
720
|
</div>
|
721
721
|
<nav>
|
722
722
|
<ul>
|
@@ -765,7 +765,7 @@
|
|
765
765
|
<span class="terminal-command">gem install gitingest</span>
|
766
766
|
</div>
|
767
767
|
<div class="terminal-output">
|
768
|
-
Successfully installed gitingest-0.
|
768
|
+
Successfully installed gitingest-0.7.0<br />
|
769
769
|
1 gem installed
|
770
770
|
</div>
|
771
771
|
<div class="terminal-line">
|
@@ -902,10 +902,33 @@ gitingest --repository user/repo --show-structure</code></pre>
|
|
902
902
|
<div class="container">
|
903
903
|
<h2>Changelog</h2>
|
904
904
|
<div class="timeline">
|
905
|
+
<div class="timeline-item">
|
906
|
+
<div class="timeline-date">
|
907
|
+
<span class="timeline-month">Jun</span>
|
908
|
+
<span class="timeline-day">04</span>
|
909
|
+
</div>
|
910
|
+
<div class="timeline-content">
|
911
|
+
<h3 class="timeline-version">v0.7.0</h3>
|
912
|
+
<p class="timeline-desc">Improved file exclusion logic and debug logging.</p>
|
913
|
+
<div class="timeline-list">
|
914
|
+
<ul>
|
915
|
+
<li>Improved file exclusion logic for glob patterns to correctly match files at any
|
916
|
+
directory depth (e.g., <code>*.md</code> now correctly matches
|
917
|
+
<code>docs/file.md</code>).
|
918
|
+
</li>
|
919
|
+
<li>Refined internal handling of exclusion patterns for clarity and robustness, using
|
920
|
+
<code>File.fnmatch</code> for all custom glob patterns.
|
921
|
+
</li>
|
922
|
+
<li>Enhanced debug logging for file exclusion to show the specific pattern that caused a
|
923
|
+
match.</li>
|
924
|
+
</ul>
|
925
|
+
</div>
|
926
|
+
</div>
|
927
|
+
</div>
|
905
928
|
<div class="timeline-item">
|
906
929
|
<div class="timeline-date">
|
907
930
|
<span class="timeline-month">Apr</span>
|
908
|
-
<span class="timeline-day">
|
931
|
+
<span class="timeline-day">14</span>
|
909
932
|
</div>
|
910
933
|
<div class="timeline-content">
|
911
934
|
<h3 class="timeline-version">v0.6.3</h3>
|
@@ -976,7 +999,7 @@ gitingest --repository user/repo --show-structure</code></pre>
|
|
976
999
|
<div class="timeline-item">
|
977
1000
|
<div class="timeline-date">
|
978
1001
|
<span class="timeline-month">Mar</span>
|
979
|
-
<span class="timeline-day">
|
1002
|
+
<span class="timeline-day">10</span> <!-- Corrected day based on CHANGELOG.md -->
|
980
1003
|
</div>
|
981
1004
|
<div class="timeline-content">
|
982
1005
|
<h3 class="timeline-version">v0.5.0</h3>
|
@@ -994,7 +1017,7 @@ gitingest --repository user/repo --show-structure</code></pre>
|
|
994
1017
|
<div class="timeline-item">
|
995
1018
|
<div class="timeline-date">
|
996
1019
|
<span class="timeline-month">Mar</span>
|
997
|
-
<span class="timeline-day">
|
1020
|
+
<span class="timeline-day">03</span> <!-- Corrected day based on CHANGELOG.md -->
|
998
1021
|
</div>
|
999
1022
|
<div class="timeline-content">
|
1000
1023
|
<h3 class="timeline-version">v0.4.0</h3>
|
@@ -1014,16 +1037,17 @@ gitingest --repository user/repo --show-structure</code></pre>
|
|
1014
1037
|
<span class="timeline-month">Mar</span>
|
1015
1038
|
<span class="timeline-day">03</span>
|
1016
1039
|
</div>
|
1017
|
-
<div class="timeline-content"
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1040
|
+
<div class="timeline-content">
|
1041
|
+
<h3 class="timeline-version">v0.3.1</h3>
|
1042
|
+
<p class="timeline-desc">Optimized threading with configurable options and introduced
|
1043
|
+
exponential backoff for API requests.</p>
|
1044
|
+
<div class="timeline-list">
|
1045
|
+
<ul>
|
1046
|
+
<li>Configurable threading options</li>
|
1047
|
+
<li>Added thread-local buffers</li>
|
1048
|
+
<li>Improved file exclusion performance</li>
|
1049
|
+
</ul>
|
1050
|
+
</div>
|
1027
1051
|
</div>
|
1028
1052
|
</div>
|
1029
1053
|
</div>
|
data/lib/gitingest/generator.rb
CHANGED
@@ -86,10 +86,12 @@ module Gitingest
|
|
86
86
|
def initialize(options = {})
|
87
87
|
@options = options
|
88
88
|
@repo_files = []
|
89
|
-
@excluded_patterns = []
|
89
|
+
# @excluded_patterns = [] # This will be set after validate_options
|
90
90
|
setup_logger
|
91
91
|
validate_options
|
92
92
|
configure_client
|
93
|
+
# Populate @excluded_patterns with raw patterns after options are validated
|
94
|
+
@excluded_patterns = DEFAULT_EXCLUDES + @options.fetch(:exclude, [])
|
93
95
|
compile_excluded_patterns
|
94
96
|
end
|
95
97
|
|
@@ -150,11 +152,11 @@ module Gitingest
|
|
150
152
|
|
151
153
|
@options[:output_file] ||= "#{@options[:repository].split("/").last}_prompt.txt"
|
152
154
|
@options[:branch] ||= :default
|
153
|
-
@options[:exclude] ||= []
|
155
|
+
@options[:exclude] ||= [] # Ensure :exclude is always an array
|
154
156
|
@options[:threads] ||= DEFAULT_THREAD_COUNT
|
155
157
|
@options[:thread_timeout] ||= DEFAULT_THREAD_TIMEOUT
|
156
158
|
@options[:show_structure] ||= false
|
157
|
-
@excluded_patterns
|
159
|
+
# NOTE: @excluded_patterns is set in compile_excluded_patterns based on @options[:exclude] # This comment is now incorrect / removed.
|
158
160
|
end
|
159
161
|
|
160
162
|
def configure_client
|
@@ -169,63 +171,25 @@ module Gitingest
|
|
169
171
|
|
170
172
|
def compile_excluded_patterns
|
171
173
|
@default_patterns = DEFAULT_EXCLUDES.map { |pattern| Regexp.new(pattern) }
|
172
|
-
@
|
173
|
-
@
|
174
|
+
@custom_glob_patterns = [] # For File.fnmatch
|
175
|
+
@directory_patterns = []
|
174
176
|
|
175
|
-
@options[:exclude].each do |
|
176
|
-
if
|
177
|
-
@
|
177
|
+
@options[:exclude].each do |pattern_str|
|
178
|
+
if pattern_str.end_with?("/")
|
179
|
+
@directory_patterns << pattern_str
|
178
180
|
else
|
179
|
-
|
181
|
+
# All other custom excludes are treated as glob patterns.
|
182
|
+
# If the pattern does not contain a slash, prepend "**/"
|
183
|
+
# to make it match at any depth (e.g., "*.md" becomes "**/*.md").
|
184
|
+
@custom_glob_patterns << if pattern_str.include?("/")
|
185
|
+
pattern_str
|
186
|
+
else
|
187
|
+
"**/#{pattern_str}"
|
188
|
+
end
|
180
189
|
end
|
181
190
|
end
|
182
191
|
end
|
183
192
|
|
184
|
-
# Builds a single regex from the combined default and custom exclusion patterns.
|
185
|
-
# Handles glob patterns and directory patterns (ending with /).
|
186
|
-
#
|
187
|
-
# @return [Regexp] The combined exclusion regex.
|
188
|
-
def build_exclusion_regex
|
189
|
-
combined_patterns = DEFAULT_EXCLUDES + @options.fetch(:exclude, [])
|
190
|
-
regex_parts = combined_patterns.map do |pattern|
|
191
|
-
if pattern.end_with?("/")
|
192
|
-
# Directory pattern: Match anything starting with this path
|
193
|
-
"^#{Regexp.escape(pattern)}"
|
194
|
-
else
|
195
|
-
# File or glob pattern: Convert glob to regex
|
196
|
-
glob_to_regex(pattern)
|
197
|
-
end
|
198
|
-
end
|
199
|
-
# Combine all parts, ensuring they match the full path or directory prefix
|
200
|
-
Regexp.new(regex_parts.join("|"))
|
201
|
-
end
|
202
|
-
|
203
|
-
# Converts a glob pattern to a Regexp string.
|
204
|
-
# Handles *, **, ?, and character classes.
|
205
|
-
# Ensures the pattern matches the entire string by default.
|
206
|
-
#
|
207
|
-
# @param glob [String] The glob pattern.
|
208
|
-
# @return [String] The regex pattern string.
|
209
|
-
def glob_to_regex(glob)
|
210
|
-
# More robust glob conversion
|
211
|
-
regex = glob.gsub(%r{/\*\*($|/)}, '/.*\\1') # Handle **/ and ** at end
|
212
|
-
.gsub("*", "[^/]*") # Match * within path segments
|
213
|
-
.gsub("?", "[^/]") # Match ? within path segments
|
214
|
-
.gsub(".", '\\.') # Escape dots
|
215
|
-
.gsub("{", "(") # Convert { to ( for grouping
|
216
|
-
.gsub("}", ")") # Convert } to ) for grouping
|
217
|
-
.gsub(",", "|") # Convert , to | for OR within groups
|
218
|
-
|
219
|
-
# Ensure the pattern matches the full path unless it was originally a directory pattern
|
220
|
-
# (which is handled separately now) or contains wildcards suggesting partial match.
|
221
|
-
# If it contains no wildcards, anchor it.
|
222
|
-
if glob.include?("*") || glob.include?("?") || glob.include?("{")
|
223
|
-
regex # Allow partial matching for wildcards
|
224
|
-
else
|
225
|
-
"^#{regex}$" # Anchor exact matches
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
193
|
def fetch_repository_contents
|
230
194
|
@logger.info "Fetching repository: #{@options[:repository]} (branch: #{@options[:branch]})"
|
231
195
|
validate_repository_access
|
@@ -269,87 +233,38 @@ module Gitingest
|
|
269
233
|
return true if path.match?(DOT_FILE_PATTERN)
|
270
234
|
|
271
235
|
# Check for directory exclusion patterns (ending with '/')
|
272
|
-
@
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
end
|
236
|
+
matched_dir_pattern = @directory_patterns.find { |dir_pattern| path.start_with?(dir_pattern) }
|
237
|
+
if matched_dir_pattern
|
238
|
+
@logger.debug { "Excluding #{path} (matched directory pattern: #{matched_dir_pattern})" }
|
239
|
+
return true
|
277
240
|
end
|
278
241
|
|
279
|
-
#
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
end
|
285
|
-
|
286
|
-
def glob_match?(pattern, string)
|
287
|
-
return true if pattern == string
|
288
|
-
return false if !pattern.match?(/[*?\[]/) && pattern != string
|
289
|
-
|
290
|
-
pattern_idx = 0
|
291
|
-
string_idx = 0
|
292
|
-
|
293
|
-
while pattern_idx < pattern.length && string_idx < string.length
|
294
|
-
case pattern[pattern_idx]
|
295
|
-
when "*"
|
296
|
-
pattern_idx += 1 while pattern_idx + 1 < pattern.length && pattern[pattern_idx + 1] == "*"
|
297
|
-
return true if pattern_idx == pattern.length - 1
|
298
|
-
|
299
|
-
next_char = pattern[pattern_idx + 1]
|
300
|
-
pattern_idx += 1
|
301
|
-
while string_idx < string.length
|
302
|
-
break if string[string_idx] == next_char || next_char == "?" ||
|
303
|
-
(next_char == "[" && char_class_match?(pattern, pattern_idx, string[string_idx]))
|
304
|
-
|
305
|
-
string_idx += 1
|
306
|
-
end
|
307
|
-
when "?" then string_idx += 1
|
308
|
-
pattern_idx += 1
|
309
|
-
when "["
|
310
|
-
return false unless char_class_match?(pattern, pattern_idx, string[string_idx])
|
311
|
-
|
312
|
-
pattern_idx += 1
|
313
|
-
pattern_idx += 1 while pattern_idx < pattern.length && pattern[pattern_idx] != "]"
|
314
|
-
pattern_idx += 1
|
315
|
-
string_idx += 1
|
316
|
-
when string[string_idx] then string_idx += 1
|
317
|
-
pattern_idx += 1
|
318
|
-
else return false
|
319
|
-
end
|
242
|
+
# Check default regex patterns
|
243
|
+
matched_default_pattern = @default_patterns.find { |pattern| path.match?(pattern) }
|
244
|
+
if matched_default_pattern
|
245
|
+
@logger.debug { "Excluding #{path} (matched default pattern: #{matched_default_pattern.source})" }
|
246
|
+
return true
|
320
247
|
end
|
321
248
|
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
def char_class_match?(pattern, class_start_idx, char)
|
327
|
-
idx = class_start_idx + 1
|
328
|
-
match = false
|
329
|
-
negate = pattern[idx] == "^" && (idx += 1)
|
330
|
-
|
331
|
-
while idx < pattern.length && pattern[idx] != "]"
|
332
|
-
if idx + 2 < pattern.length && pattern[idx + 1] == "-"
|
333
|
-
range_start = pattern[idx]
|
334
|
-
range_end = pattern[idx + 2]
|
335
|
-
match = true if char >= range_start && char <= range_end
|
336
|
-
idx += 3
|
337
|
-
else
|
338
|
-
match = true if pattern[idx] == char
|
339
|
-
idx += 1
|
340
|
-
end
|
341
|
-
break if match
|
249
|
+
# Check custom glob patterns using File.fnmatch
|
250
|
+
matched_glob_pattern = @custom_glob_patterns.find do |glob_pattern|
|
251
|
+
File.fnmatch(glob_pattern, path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
|
342
252
|
end
|
343
|
-
|
253
|
+
if matched_glob_pattern
|
254
|
+
@logger.debug { "Excluding #{path} (matched custom glob pattern: #{matched_glob_pattern})" }
|
255
|
+
return true
|
256
|
+
end
|
257
|
+
|
258
|
+
false
|
344
259
|
end
|
345
260
|
|
346
261
|
def process_content_to_output(output)
|
347
262
|
@logger.debug "Using thread pool with #{@options[:threads]} threads"
|
348
263
|
buffer = []
|
349
264
|
progress = ProgressIndicator.new(@repo_files.size, @logger)
|
350
|
-
thread_buffers =
|
351
|
-
mutex = Mutex.new
|
352
|
-
errors =
|
265
|
+
thread_buffers = Concurrent::Map.new # Thread-safe map for buffers
|
266
|
+
mutex = Mutex.new # Mutex for shared buffer and output operations
|
267
|
+
errors = Concurrent::Array.new # Thread-safe array for errors
|
353
268
|
pool = Concurrent::FixedThreadPool.new(@options[:threads])
|
354
269
|
prioritized_files = prioritize_files(@repo_files)
|
355
270
|
|
@@ -380,9 +295,10 @@ module Gitingest
|
|
380
295
|
end
|
381
296
|
|
382
297
|
pool.shutdown
|
383
|
-
pool.wait_for_termination(@options[:thread_timeout])
|
384
|
-
|
385
|
-
|
298
|
+
unless pool.wait_for_termination(@options[:thread_timeout])
|
299
|
+
@logger.warn "Thread pool did not shut down gracefully within #{@options[:thread_timeout]}s, forcing termination."
|
300
|
+
pool.kill
|
301
|
+
end
|
386
302
|
|
387
303
|
mutex.synchronize do
|
388
304
|
thread_buffers.each_value { |local_buffer| buffer.concat(local_buffer) unless local_buffer.empty? }
|
@@ -459,12 +375,14 @@ module Gitingest
|
|
459
375
|
elapsed = now - @start_time
|
460
376
|
progress_chars = (BAR_WIDTH * (current.to_f / @total)).round
|
461
377
|
bar = "[#{"|" * progress_chars}#{" " * (BAR_WIDTH - progress_chars)}]"
|
462
|
-
|
463
|
-
rate =
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
378
|
+
|
379
|
+
rate = if elapsed.positive?
|
380
|
+
(current / elapsed).round(1)
|
381
|
+
else
|
382
|
+
0 # Avoid division by zero if elapsed time is zero
|
383
|
+
end
|
384
|
+
eta_string = current.positive? && percent < 100 && rate.positive? ? " ETA: #{format_time((@total - current) / rate)}" : ""
|
385
|
+
|
468
386
|
print "\r\e[K#{bar} #{percent}% | #{current}/#{@total} files (#{rate} files/sec)#{eta_string}"
|
469
387
|
print "\n" if current == @total
|
470
388
|
if (percent % 10).zero? && percent != @last_percent || current == @total
|
data/lib/gitingest/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gitingest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Davide Santangelo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-04
|
11
|
+
date: 2025-06-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: concurrent-ruby
|