gitingest 0.6.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7f6ba5c5ad2194b133eba160ccfe3e5a09313174eabe75aa001b8e6504822961
4
- data.tar.gz: 72e8160b0cfc168e8503578a611c3592b0f8b7cadd25f1c14d86401e41f768aa
3
+ metadata.gz: 0c5544ec1d956710315693490fddc60f26452452ca674739d9a343f0418b26c6
4
+ data.tar.gz: 6d9dd377e78788bf3f3b0a6511cc38cfc9a5e85bba787f905f879235d767dc8c
5
5
  SHA512:
6
- metadata.gz: c332a1d0c70e8bc5cdf9583830b27568b0bad48bde4579fce40634d139f47d77b1dd68d39313142ecb474d8dcfb36cbb2f1f42d8e51e58a64c7fa994822fe6ea
7
- data.tar.gz: 89407c94365c307071e39dd7e598d7e1cc2ea1afc6206fea92bacdc66ae1e85ac8feba6e95d89238cd6444eb9b45251d71dcf2e2ef365fa73472489e376aceb9
6
+ metadata.gz: 96154003ef92d35503ddb9488ee25f147c0809f88226a3554d818e43b6692908fae94e00d56a525a8c31327f7be3b0323486bb79227dac5fe36709cd99f09a11
7
+ data.tar.gz: 25d6c8de3537f84f27feedc6fb3ec812e14ad3ebf1cbe708b3931e757741d00a69d7e43a86f08bb5198cf1532f92b44d64bdf9c53afa2ec802c7013cb54bf366
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.7.0] - 2025-06-04
4
+
5
+ ### Changed
6
+ - Improved file exclusion logic for glob patterns to correctly match files at any directory depth (e.g., `*.md` now correctly matches `docs/file.md`).
7
+ - Refined internal handling of exclusion patterns for clarity and robustness, using `File.fnmatch` for all custom glob patterns.
8
+ - Enhanced debug logging for file exclusion to show the specific pattern that caused a match.
9
+
3
10
  ## [0.6.3] - 2025-04-14
4
11
 
5
12
  ### Fixed
@@ -16,8 +23,6 @@
16
23
  ### Fixed
17
24
  - Fixed error "target of repeat operator is not specified" when using `--exclude` with glob patterns like `*.md`
18
25
 
19
- ---
20
-
21
26
  ## [0.6.0] - 2025-03-18
22
27
 
23
28
  ### Changed
@@ -26,8 +31,6 @@
26
31
  - Updated documentation to reflect the correct default branch behavior
27
32
  - Fixed issues with repository validation in tests
28
33
 
29
- ---
30
-
31
34
  ## [0.5.0] - 2025-03-10
32
35
 
33
36
  ### Added
@@ -40,8 +43,6 @@
40
43
  - Enhanced documentation with directory structure visualization examples
41
44
  - Updated CLI help with the new option
42
45
 
43
- ---
44
-
45
46
  ## [0.4.0] - 2025-03-03
46
47
 
47
48
  ### Added
@@ -60,8 +61,6 @@
60
61
  - Fixed race conditions in progress indicator updates
61
62
  - Addressed timing inconsistencies in multithreaded test scenarios
62
63
 
63
- ---
64
-
65
64
  ## [0.3.1] - 2025-03-03
66
65
 
67
66
  ### Added
@@ -82,8 +81,6 @@
82
81
  - Ensured thread pool shutdown respects the configured timeout.
83
82
  - Resolved potential race conditions in file content retrieval.
84
83
 
85
- ---
86
-
87
84
  ## [0.3.0] - 2025-03-02
88
85
 
89
86
  ### Added
@@ -96,8 +93,6 @@
96
93
  - Optimized thread pool size calculation for improved performance.
97
94
  - Improved error handling in concurrent operations.
98
95
 
99
- ---
100
-
101
96
  ## [0.2.0] - 2025-03-02
102
97
 
103
98
  ### Added
@@ -111,8 +106,6 @@
111
106
  - Enforced a 1000-file limit to prevent memory overload.
112
107
  - Updated version to `0.2.0`.
113
108
 
114
- ---
115
-
116
109
  ## [0.1.0] - 2025-03-02
117
110
 
118
111
  ### Added
data/index.html CHANGED
@@ -716,7 +716,7 @@
716
716
  <div class="header-container">
717
717
  <div class="logo">
718
718
  <div class="logo-icon">G</div>
719
- <div class="logo-text">Gitingest <span class="version-tag">v0.6.3</span></div>
719
+ <div class="logo-text">Gitingest <span class="version-tag">v0.7.0</span></div>
720
720
  </div>
721
721
  <nav>
722
722
  <ul>
@@ -765,7 +765,7 @@
765
765
  <span class="terminal-command">gem install gitingest</span>
766
766
  </div>
767
767
  <div class="terminal-output">
768
- Successfully installed gitingest-0.6.3<br />
768
+ Successfully installed gitingest-0.7.0<br />
769
769
  1 gem installed
770
770
  </div>
771
771
  <div class="terminal-line">
@@ -902,10 +902,33 @@ gitingest --repository user/repo --show-structure</code></pre>
902
902
  <div class="container">
903
903
  <h2>Changelog</h2>
904
904
  <div class="timeline">
905
+ <div class="timeline-item">
906
+ <div class="timeline-date">
907
+ <span class="timeline-month">Jun</span>
908
+ <span class="timeline-day">04</span>
909
+ </div>
910
+ <div class="timeline-content">
911
+ <h3 class="timeline-version">v0.7.0</h3>
912
+ <p class="timeline-desc">Improved file exclusion logic and debug logging.</p>
913
+ <div class="timeline-list">
914
+ <ul>
915
+ <li>Improved file exclusion logic for glob patterns to correctly match files at any
916
+ directory depth (e.g., <code>*.md</code> now correctly matches
917
+ <code>docs/file.md</code>).
918
+ </li>
919
+ <li>Refined internal handling of exclusion patterns for clarity and robustness, using
920
+ <code>File.fnmatch</code> for all custom glob patterns.
921
+ </li>
922
+ <li>Enhanced debug logging for file exclusion to show the specific pattern that caused a
923
+ match.</li>
924
+ </ul>
925
+ </div>
926
+ </div>
927
+ </div>
905
928
  <div class="timeline-item">
906
929
  <div class="timeline-date">
907
930
  <span class="timeline-month">Apr</span>
908
- <span class="timeline-day">15</span>
931
+ <span class="timeline-day">14</span>
909
932
  </div>
910
933
  <div class="timeline-content">
911
934
  <h3 class="timeline-version">v0.6.3</h3>
@@ -976,7 +999,7 @@ gitingest --repository user/repo --show-structure</code></pre>
976
999
  <div class="timeline-item">
977
1000
  <div class="timeline-date">
978
1001
  <span class="timeline-month">Mar</span>
979
- <span class="timeline-day">04</span>
1002
+ <span class="timeline-day">10</span> <!-- Corrected day based on CHANGELOG.md -->
980
1003
  </div>
981
1004
  <div class="timeline-content">
982
1005
  <h3 class="timeline-version">v0.5.0</h3>
@@ -994,7 +1017,7 @@ gitingest --repository user/repo --show-structure</code></pre>
994
1017
  <div class="timeline-item">
995
1018
  <div class="timeline-date">
996
1019
  <span class="timeline-month">Mar</span>
997
- <span class="timeline-day">04</span>
1020
+ <span class="timeline-day">03</span> <!-- Corrected day based on CHANGELOG.md -->
998
1021
  </div>
999
1022
  <div class="timeline-content">
1000
1023
  <h3 class="timeline-version">v0.4.0</h3>
@@ -1014,16 +1037,17 @@ gitingest --repository user/repo --show-structure</code></pre>
1014
1037
  <span class="timeline-month">Mar</span>
1015
1038
  <span class="timeline-day">03</span>
1016
1039
  </div>
1017
- <div class="timeline-content"></div>
1018
- <h3 class="timeline-version">v0.3.1</h3>
1019
- <p class="timeline-desc">Optimized threading with configurable options and introduced
1020
- exponential backoff for API requests.</p>
1021
- <div class="timeline-list">
1022
- <ul>
1023
- <li>Configurable threading options</li>
1024
- <li>Added thread-local buffers</li>
1025
- <li>Improved file exclusion performance</li>
1026
- </ul>
1040
+ <div class="timeline-content">
1041
+ <h3 class="timeline-version">v0.3.1</h3>
1042
+ <p class="timeline-desc">Optimized threading with configurable options and introduced
1043
+ exponential backoff for API requests.</p>
1044
+ <div class="timeline-list">
1045
+ <ul>
1046
+ <li>Configurable threading options</li>
1047
+ <li>Added thread-local buffers</li>
1048
+ <li>Improved file exclusion performance</li>
1049
+ </ul>
1050
+ </div>
1027
1051
  </div>
1028
1052
  </div>
1029
1053
  </div>
@@ -86,10 +86,12 @@ module Gitingest
86
86
  def initialize(options = {})
87
87
  @options = options
88
88
  @repo_files = []
89
- @excluded_patterns = []
89
+ # @excluded_patterns = [] # This will be set after validate_options
90
90
  setup_logger
91
91
  validate_options
92
92
  configure_client
93
+ # Populate @excluded_patterns with raw patterns after options are validated
94
+ @excluded_patterns = DEFAULT_EXCLUDES + @options.fetch(:exclude, [])
93
95
  compile_excluded_patterns
94
96
  end
95
97
 
@@ -150,11 +152,11 @@ module Gitingest
150
152
 
151
153
  @options[:output_file] ||= "#{@options[:repository].split("/").last}_prompt.txt"
152
154
  @options[:branch] ||= :default
153
- @options[:exclude] ||= []
155
+ @options[:exclude] ||= [] # Ensure :exclude is always an array
154
156
  @options[:threads] ||= DEFAULT_THREAD_COUNT
155
157
  @options[:thread_timeout] ||= DEFAULT_THREAD_TIMEOUT
156
158
  @options[:show_structure] ||= false
157
- @excluded_patterns = DEFAULT_EXCLUDES + @options[:exclude]
159
+ # NOTE: @excluded_patterns is set in compile_excluded_patterns based on @options[:exclude] # This comment is now incorrect / removed.
158
160
  end
159
161
 
160
162
  def configure_client
@@ -169,63 +171,25 @@ module Gitingest
169
171
 
170
172
  def compile_excluded_patterns
171
173
  @default_patterns = DEFAULT_EXCLUDES.map { |pattern| Regexp.new(pattern) }
172
- @custom_patterns = []
173
- @glob_patterns_with_char_classes = []
174
+ @custom_glob_patterns = [] # For File.fnmatch
175
+ @directory_patterns = []
174
176
 
175
- @options[:exclude].each do |glob_pattern|
176
- if glob_pattern.include?("[") && glob_pattern.include?("]")
177
- @glob_patterns_with_char_classes << glob_pattern
177
+ @options[:exclude].each do |pattern_str|
178
+ if pattern_str.end_with?("/")
179
+ @directory_patterns << pattern_str
178
180
  else
179
- @custom_patterns << Regexp.new(glob_to_regex(glob_pattern))
181
+ # All other custom excludes are treated as glob patterns.
182
+ # If the pattern does not contain a slash, prepend "**/"
183
+ # to make it match at any depth (e.g., "*.md" becomes "**/*.md").
184
+ @custom_glob_patterns << if pattern_str.include?("/")
185
+ pattern_str
186
+ else
187
+ "**/#{pattern_str}"
188
+ end
180
189
  end
181
190
  end
182
191
  end
183
192
 
184
- # Builds a single regex from the combined default and custom exclusion patterns.
185
- # Handles glob patterns and directory patterns (ending with /).
186
- #
187
- # @return [Regexp] The combined exclusion regex.
188
- def build_exclusion_regex
189
- combined_patterns = DEFAULT_EXCLUDES + @options.fetch(:exclude, [])
190
- regex_parts = combined_patterns.map do |pattern|
191
- if pattern.end_with?("/")
192
- # Directory pattern: Match anything starting with this path
193
- "^#{Regexp.escape(pattern)}"
194
- else
195
- # File or glob pattern: Convert glob to regex
196
- glob_to_regex(pattern)
197
- end
198
- end
199
- # Combine all parts, ensuring they match the full path or directory prefix
200
- Regexp.new(regex_parts.join("|"))
201
- end
202
-
203
- # Converts a glob pattern to a Regexp string.
204
- # Handles *, **, ?, and character classes.
205
- # Ensures the pattern matches the entire string by default.
206
- #
207
- # @param glob [String] The glob pattern.
208
- # @return [String] The regex pattern string.
209
- def glob_to_regex(glob)
210
- # More robust glob conversion
211
- regex = glob.gsub(%r{/\*\*($|/)}, '/.*\\1') # Handle **/ and ** at end
212
- .gsub("*", "[^/]*") # Match * within path segments
213
- .gsub("?", "[^/]") # Match ? within path segments
214
- .gsub(".", '\\.') # Escape dots
215
- .gsub("{", "(") # Convert { to ( for grouping
216
- .gsub("}", ")") # Convert } to ) for grouping
217
- .gsub(",", "|") # Convert , to | for OR within groups
218
-
219
- # Ensure the pattern matches the full path unless it was originally a directory pattern
220
- # (which is handled separately now) or contains wildcards suggesting partial match.
221
- # If it contains no wildcards, anchor it.
222
- if glob.include?("*") || glob.include?("?") || glob.include?("{")
223
- regex # Allow partial matching for wildcards
224
- else
225
- "^#{regex}$" # Anchor exact matches
226
- end
227
- end
228
-
229
193
  def fetch_repository_contents
230
194
  @logger.info "Fetching repository: #{@options[:repository]} (branch: #{@options[:branch]})"
231
195
  validate_repository_access
@@ -269,87 +233,38 @@ module Gitingest
269
233
  return true if path.match?(DOT_FILE_PATTERN)
270
234
 
271
235
  # Check for directory exclusion patterns (ending with '/')
272
- @options[:exclude].each do |pattern|
273
- if pattern.end_with?("/") && path.start_with?(pattern)
274
- @logger.debug "Excluding #{path} (matched directory pattern #{pattern})" if @logger.debug?
275
- return true
276
- end
236
+ matched_dir_pattern = @directory_patterns.find { |dir_pattern| path.start_with?(dir_pattern) }
237
+ if matched_dir_pattern
238
+ @logger.debug { "Excluding #{path} (matched directory pattern: #{matched_dir_pattern})" }
239
+ return true
277
240
  end
278
241
 
279
- # Continue with regular pattern checks
280
- return true if @default_patterns.any? { |pattern| path.match?(pattern) }
281
- return true if @custom_patterns.any? { |pattern| path.match?(pattern) }
282
-
283
- @glob_patterns_with_char_classes.any? { |glob_pattern| glob_match?(glob_pattern, path) }
284
- end
285
-
286
- def glob_match?(pattern, string)
287
- return true if pattern == string
288
- return false if !pattern.match?(/[*?\[]/) && pattern != string
289
-
290
- pattern_idx = 0
291
- string_idx = 0
292
-
293
- while pattern_idx < pattern.length && string_idx < string.length
294
- case pattern[pattern_idx]
295
- when "*"
296
- pattern_idx += 1 while pattern_idx + 1 < pattern.length && pattern[pattern_idx + 1] == "*"
297
- return true if pattern_idx == pattern.length - 1
298
-
299
- next_char = pattern[pattern_idx + 1]
300
- pattern_idx += 1
301
- while string_idx < string.length
302
- break if string[string_idx] == next_char || next_char == "?" ||
303
- (next_char == "[" && char_class_match?(pattern, pattern_idx, string[string_idx]))
304
-
305
- string_idx += 1
306
- end
307
- when "?" then string_idx += 1
308
- pattern_idx += 1
309
- when "["
310
- return false unless char_class_match?(pattern, pattern_idx, string[string_idx])
311
-
312
- pattern_idx += 1
313
- pattern_idx += 1 while pattern_idx < pattern.length && pattern[pattern_idx] != "]"
314
- pattern_idx += 1
315
- string_idx += 1
316
- when string[string_idx] then string_idx += 1
317
- pattern_idx += 1
318
- else return false
319
- end
242
+ # Check default regex patterns
243
+ matched_default_pattern = @default_patterns.find { |pattern| path.match?(pattern) }
244
+ if matched_default_pattern
245
+ @logger.debug { "Excluding #{path} (matched default pattern: #{matched_default_pattern.source})" }
246
+ return true
320
247
  end
321
248
 
322
- pattern_idx += 1 while pattern_idx < pattern.length && pattern[pattern_idx] == "*"
323
- pattern_idx == pattern.length && string_idx == string.length
324
- end
325
-
326
- def char_class_match?(pattern, class_start_idx, char)
327
- idx = class_start_idx + 1
328
- match = false
329
- negate = pattern[idx] == "^" && (idx += 1)
330
-
331
- while idx < pattern.length && pattern[idx] != "]"
332
- if idx + 2 < pattern.length && pattern[idx + 1] == "-"
333
- range_start = pattern[idx]
334
- range_end = pattern[idx + 2]
335
- match = true if char >= range_start && char <= range_end
336
- idx += 3
337
- else
338
- match = true if pattern[idx] == char
339
- idx += 1
340
- end
341
- break if match
249
+ # Check custom glob patterns using File.fnmatch
250
+ matched_glob_pattern = @custom_glob_patterns.find do |glob_pattern|
251
+ File.fnmatch(glob_pattern, path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
342
252
  end
343
- negate ? !match : match
253
+ if matched_glob_pattern
254
+ @logger.debug { "Excluding #{path} (matched custom glob pattern: #{matched_glob_pattern})" }
255
+ return true
256
+ end
257
+
258
+ false
344
259
  end
345
260
 
346
261
  def process_content_to_output(output)
347
262
  @logger.debug "Using thread pool with #{@options[:threads]} threads"
348
263
  buffer = []
349
264
  progress = ProgressIndicator.new(@repo_files.size, @logger)
350
- thread_buffers = {}
351
- mutex = Mutex.new
352
- errors = []
265
+ thread_buffers = Concurrent::Map.new # Thread-safe map for buffers
266
+ mutex = Mutex.new # Mutex for shared buffer and output operations
267
+ errors = Concurrent::Array.new # Thread-safe array for errors
353
268
  pool = Concurrent::FixedThreadPool.new(@options[:threads])
354
269
  prioritized_files = prioritize_files(@repo_files)
355
270
 
@@ -380,9 +295,10 @@ module Gitingest
380
295
  end
381
296
 
382
297
  pool.shutdown
383
- pool.wait_for_termination(@options[:thread_timeout]) || (@logger.warn "Thread pool timeout, forcing termination"
384
-
385
- pool.kill)
298
+ unless pool.wait_for_termination(@options[:thread_timeout])
299
+ @logger.warn "Thread pool did not shut down gracefully within #{@options[:thread_timeout]}s, forcing termination."
300
+ pool.kill
301
+ end
386
302
 
387
303
  mutex.synchronize do
388
304
  thread_buffers.each_value { |local_buffer| buffer.concat(local_buffer) unless local_buffer.empty? }
@@ -459,12 +375,14 @@ module Gitingest
459
375
  elapsed = now - @start_time
460
376
  progress_chars = (BAR_WIDTH * (current.to_f / @total)).round
461
377
  bar = "[#{"|" * progress_chars}#{" " * (BAR_WIDTH - progress_chars)}]"
462
- eta_string = current > 1 && percent < 100 ? " ETA: #{format_time((elapsed / current) * (@total - current))}" : ""
463
- rate = begin
464
- (current / elapsed).round(1)
465
- rescue StandardError
466
- 0
467
- end
378
+
379
+ rate = if elapsed.positive?
380
+ (current / elapsed).round(1)
381
+ else
382
+ 0 # Avoid division by zero if elapsed time is zero
383
+ end
384
+ eta_string = current.positive? && percent < 100 && rate.positive? ? " ETA: #{format_time((@total - current) / rate)}" : ""
385
+
468
386
  print "\r\e[K#{bar} #{percent}% | #{current}/#{@total} files (#{rate} files/sec)#{eta_string}"
469
387
  print "\n" if current == @total
470
388
  if (percent % 10).zero? && percent != @last_percent || current == @total
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Gitingest
4
- VERSION = "0.6.3"
4
+ VERSION = "0.7.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitingest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.3
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Davide Santangelo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-04-14 00:00:00.000000000 Z
11
+ date: 2025-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby