gitingest 0.6.3 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7f6ba5c5ad2194b133eba160ccfe3e5a09313174eabe75aa001b8e6504822961
4
- data.tar.gz: 72e8160b0cfc168e8503578a611c3592b0f8b7cadd25f1c14d86401e41f768aa
3
+ metadata.gz: 8823c78db091723b50cdeebcb51cc716315d2a3351e295badfa80b0cb48270f7
4
+ data.tar.gz: 530854838bff4d35f40a22d355cf256db244dbedd0a0be70a88f067e19f9ec5e
5
5
  SHA512:
6
- metadata.gz: c332a1d0c70e8bc5cdf9583830b27568b0bad48bde4579fce40634d139f47d77b1dd68d39313142ecb474d8dcfb36cbb2f1f42d8e51e58a64c7fa994822fe6ea
7
- data.tar.gz: 89407c94365c307071e39dd7e598d7e1cc2ea1afc6206fea92bacdc66ae1e85ac8feba6e95d89238cd6444eb9b45251d71dcf2e2ef365fa73472489e376aceb9
6
+ metadata.gz: 3fb379041c49627197e47fa3df1b59f6a1a553772e2a162fb38fe89d6eca61f5b0f6cdf1c9ccaf5857fab709b6d7eb3d773375d2db60f409a6df6dff6823fffa
7
+ data.tar.gz: caa01a9ea924ec97127c8e75a9d1abb3511a93aef3f8a21714910dde7c133f4ffccfea35004d49d1d04d87f844807d596bdf2a956667d1647e0737db084e3e20
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.7.1] - 2025-06-20
4
+
5
+ ### Changed
6
+ - Refactored file prioritization logic to use a `case` statement for improved readability and maintainability.
7
+
8
+ ## [0.7.0] - 2025-06-04
9
+
10
+ ### Changed
11
+ - Improved file exclusion logic for glob patterns to correctly match files at any directory depth (e.g., `*.md` now correctly matches `docs/file.md`).
12
+ - Refined internal handling of exclusion patterns for clarity and robustness, using `File.fnmatch` for all custom glob patterns.
13
+ - Enhanced debug logging for file exclusion to show the specific pattern that caused a match.
14
+
3
15
  ## [0.6.3] - 2025-04-14
4
16
 
5
17
  ### Fixed
@@ -16,8 +28,6 @@
16
28
  ### Fixed
17
29
  - Fixed error "target of repeat operator is not specified" when using `--exclude` with glob patterns like `*.md`
18
30
 
19
- ---
20
-
21
31
  ## [0.6.0] - 2025-03-18
22
32
 
23
33
  ### Changed
@@ -26,8 +36,6 @@
26
36
  - Updated documentation to reflect the correct default branch behavior
27
37
  - Fixed issues with repository validation in tests
28
38
 
29
- ---
30
-
31
39
  ## [0.5.0] - 2025-03-10
32
40
 
33
41
  ### Added
@@ -40,8 +48,6 @@
40
48
  - Enhanced documentation with directory structure visualization examples
41
49
  - Updated CLI help with the new option
42
50
 
43
- ---
44
-
45
51
  ## [0.4.0] - 2025-03-03
46
52
 
47
53
  ### Added
@@ -60,8 +66,6 @@
60
66
  - Fixed race conditions in progress indicator updates
61
67
  - Addressed timing inconsistencies in multithreaded test scenarios
62
68
 
63
- ---
64
-
65
69
  ## [0.3.1] - 2025-03-03
66
70
 
67
71
  ### Added
@@ -82,8 +86,6 @@
82
86
  - Ensured thread pool shutdown respects the configured timeout.
83
87
  - Resolved potential race conditions in file content retrieval.
84
88
 
85
- ---
86
-
87
89
  ## [0.3.0] - 2025-03-02
88
90
 
89
91
  ### Added
@@ -96,8 +98,6 @@
96
98
  - Optimized thread pool size calculation for improved performance.
97
99
  - Improved error handling in concurrent operations.
98
100
 
99
- ---
100
-
101
101
  ## [0.2.0] - 2025-03-02
102
102
 
103
103
  ### Added
@@ -111,8 +111,6 @@
111
111
  - Enforced a 1000-file limit to prevent memory overload.
112
112
  - Updated version to `0.2.0`.
113
113
 
114
- ---
115
-
116
114
  ## [0.1.0] - 2025-03-02
117
115
 
118
116
  ### Added
data/index.html CHANGED
@@ -716,7 +716,7 @@
716
716
  <div class="header-container">
717
717
  <div class="logo">
718
718
  <div class="logo-icon">G</div>
719
- <div class="logo-text">Gitingest <span class="version-tag">v0.6.3</span></div>
719
+ <div class="logo-text">Gitingest <span class="version-tag">v0.7.1</span></div>
720
720
  </div>
721
721
  <nav>
722
722
  <ul>
@@ -765,7 +765,7 @@
765
765
  <span class="terminal-command">gem install gitingest</span>
766
766
  </div>
767
767
  <div class="terminal-output">
768
- Successfully installed gitingest-0.6.3<br />
768
+ Successfully installed gitingest-0.7.1<br />
769
769
  1 gem installed
770
770
  </div>
771
771
  <div class="terminal-line">
@@ -902,10 +902,48 @@ gitingest --repository user/repo --show-structure</code></pre>
902
902
  <div class="container">
903
903
  <h2>Changelog</h2>
904
904
  <div class="timeline">
905
+ <div class="timeline-item">
906
+ <div class="timeline-date">
907
+ <span class="timeline-month">Jun</span>
908
+ <span class="timeline-day">20</span>
909
+ </div>
910
+ <div class="timeline-content">
911
+ <h3 class="timeline-version">v0.7.1</h3>
912
+ <p class="timeline-desc">Minor internal code refactoring for better readability.</p>
913
+ <div class="timeline-list">
914
+ <ul>
915
+ <li>Refactored the file prioritization logic to be more readable and maintainable.</li>
916
+ </ul>
917
+ </div>
918
+ </div>
919
+ </div>
920
+ <div class="timeline-item">
921
+ <div class="timeline-date">
922
+ <span class="timeline-month">Jun</span>
923
+ <span class="timeline-day">04</span>
924
+ </div>
925
+ <div class="timeline-content">
926
+ <h3 class="timeline-version">v0.7.0</h3>
927
+ <p class="timeline-desc">Improved file exclusion logic and debug logging.</p>
928
+ <div class="timeline-list">
929
+ <ul>
930
+ <li>Improved file exclusion logic for glob patterns to correctly match files at any
931
+ directory depth (e.g., <code>*.md</code> now correctly matches
932
+ <code>docs/file.md</code>).
933
+ </li>
934
+ <li>Refined internal handling of exclusion patterns for clarity and robustness, using
935
+ <code>File.fnmatch</code> for all custom glob patterns.
936
+ </li>
937
+ <li>Enhanced debug logging for file exclusion to show the specific pattern that caused a
938
+ match.</li>
939
+ </ul>
940
+ </div>
941
+ </div>
942
+ </div>
905
943
  <div class="timeline-item">
906
944
  <div class="timeline-date">
907
945
  <span class="timeline-month">Apr</span>
908
- <span class="timeline-day">15</span>
946
+ <span class="timeline-day">14</span>
909
947
  </div>
910
948
  <div class="timeline-content">
911
949
  <h3 class="timeline-version">v0.6.3</h3>
@@ -976,7 +1014,7 @@ gitingest --repository user/repo --show-structure</code></pre>
976
1014
  <div class="timeline-item">
977
1015
  <div class="timeline-date">
978
1016
  <span class="timeline-month">Mar</span>
979
- <span class="timeline-day">04</span>
1017
+ <span class="timeline-day">10</span> <!-- Corrected day based on CHANGELOG.md -->
980
1018
  </div>
981
1019
  <div class="timeline-content">
982
1020
  <h3 class="timeline-version">v0.5.0</h3>
@@ -994,7 +1032,7 @@ gitingest --repository user/repo --show-structure</code></pre>
994
1032
  <div class="timeline-item">
995
1033
  <div class="timeline-date">
996
1034
  <span class="timeline-month">Mar</span>
997
- <span class="timeline-day">04</span>
1035
+ <span class="timeline-day">03</span> <!-- Corrected day based on CHANGELOG.md -->
998
1036
  </div>
999
1037
  <div class="timeline-content">
1000
1038
  <h3 class="timeline-version">v0.4.0</h3>
@@ -1014,16 +1052,17 @@ gitingest --repository user/repo --show-structure</code></pre>
1014
1052
  <span class="timeline-month">Mar</span>
1015
1053
  <span class="timeline-day">03</span>
1016
1054
  </div>
1017
- <div class="timeline-content"></div>
1018
- <h3 class="timeline-version">v0.3.1</h3>
1019
- <p class="timeline-desc">Optimized threading with configurable options and introduced
1020
- exponential backoff for API requests.</p>
1021
- <div class="timeline-list">
1022
- <ul>
1023
- <li>Configurable threading options</li>
1024
- <li>Added thread-local buffers</li>
1025
- <li>Improved file exclusion performance</li>
1026
- </ul>
1055
+ <div class="timeline-content">
1056
+ <h3 class="timeline-version">v0.3.1</h3>
1057
+ <p class="timeline-desc">Optimized threading with configurable options and introduced
1058
+ exponential backoff for API requests.</p>
1059
+ <div class="timeline-list">
1060
+ <ul>
1061
+ <li>Configurable threading options</li>
1062
+ <li>Added thread-local buffers</li>
1063
+ <li>Improved file exclusion performance</li>
1064
+ </ul>
1065
+ </div>
1027
1066
  </div>
1028
1067
  </div>
1029
1068
  </div>
@@ -86,10 +86,12 @@ module Gitingest
86
86
  def initialize(options = {})
87
87
  @options = options
88
88
  @repo_files = []
89
- @excluded_patterns = []
89
+ # @excluded_patterns = [] # This will be set after validate_options
90
90
  setup_logger
91
91
  validate_options
92
92
  configure_client
93
+ # Populate @excluded_patterns with raw patterns after options are validated
94
+ @excluded_patterns = DEFAULT_EXCLUDES + @options.fetch(:exclude, [])
93
95
  compile_excluded_patterns
94
96
  end
95
97
 
@@ -150,11 +152,11 @@ module Gitingest
150
152
 
151
153
  @options[:output_file] ||= "#{@options[:repository].split("/").last}_prompt.txt"
152
154
  @options[:branch] ||= :default
153
- @options[:exclude] ||= []
155
+ @options[:exclude] ||= [] # Ensure :exclude is always an array
154
156
  @options[:threads] ||= DEFAULT_THREAD_COUNT
155
157
  @options[:thread_timeout] ||= DEFAULT_THREAD_TIMEOUT
156
158
  @options[:show_structure] ||= false
157
- @excluded_patterns = DEFAULT_EXCLUDES + @options[:exclude]
159
+ # NOTE: @excluded_patterns is set in compile_excluded_patterns based on @options[:exclude] # This comment is now incorrect / removed.
158
160
  end
159
161
 
160
162
  def configure_client
@@ -169,63 +171,25 @@ module Gitingest
169
171
 
170
172
  def compile_excluded_patterns
171
173
  @default_patterns = DEFAULT_EXCLUDES.map { |pattern| Regexp.new(pattern) }
172
- @custom_patterns = []
173
- @glob_patterns_with_char_classes = []
174
+ @custom_glob_patterns = [] # For File.fnmatch
175
+ @directory_patterns = []
174
176
 
175
- @options[:exclude].each do |glob_pattern|
176
- if glob_pattern.include?("[") && glob_pattern.include?("]")
177
- @glob_patterns_with_char_classes << glob_pattern
177
+ @options[:exclude].each do |pattern_str|
178
+ if pattern_str.end_with?("/")
179
+ @directory_patterns << pattern_str
178
180
  else
179
- @custom_patterns << Regexp.new(glob_to_regex(glob_pattern))
181
+ # All other custom excludes are treated as glob patterns.
182
+ # If the pattern does not contain a slash, prepend "**/"
183
+ # to make it match at any depth (e.g., "*.md" becomes "**/*.md").
184
+ @custom_glob_patterns << if pattern_str.include?("/")
185
+ pattern_str
186
+ else
187
+ "**/#{pattern_str}"
188
+ end
180
189
  end
181
190
  end
182
191
  end
183
192
 
184
- # Builds a single regex from the combined default and custom exclusion patterns.
185
- # Handles glob patterns and directory patterns (ending with /).
186
- #
187
- # @return [Regexp] The combined exclusion regex.
188
- def build_exclusion_regex
189
- combined_patterns = DEFAULT_EXCLUDES + @options.fetch(:exclude, [])
190
- regex_parts = combined_patterns.map do |pattern|
191
- if pattern.end_with?("/")
192
- # Directory pattern: Match anything starting with this path
193
- "^#{Regexp.escape(pattern)}"
194
- else
195
- # File or glob pattern: Convert glob to regex
196
- glob_to_regex(pattern)
197
- end
198
- end
199
- # Combine all parts, ensuring they match the full path or directory prefix
200
- Regexp.new(regex_parts.join("|"))
201
- end
202
-
203
- # Converts a glob pattern to a Regexp string.
204
- # Handles *, **, ?, and character classes.
205
- # Ensures the pattern matches the entire string by default.
206
- #
207
- # @param glob [String] The glob pattern.
208
- # @return [String] The regex pattern string.
209
- def glob_to_regex(glob)
210
- # More robust glob conversion
211
- regex = glob.gsub(%r{/\*\*($|/)}, '/.*\\1') # Handle **/ and ** at end
212
- .gsub("*", "[^/]*") # Match * within path segments
213
- .gsub("?", "[^/]") # Match ? within path segments
214
- .gsub(".", '\\.') # Escape dots
215
- .gsub("{", "(") # Convert { to ( for grouping
216
- .gsub("}", ")") # Convert } to ) for grouping
217
- .gsub(",", "|") # Convert , to | for OR within groups
218
-
219
- # Ensure the pattern matches the full path unless it was originally a directory pattern
220
- # (which is handled separately now) or contains wildcards suggesting partial match.
221
- # If it contains no wildcards, anchor it.
222
- if glob.include?("*") || glob.include?("?") || glob.include?("{")
223
- regex # Allow partial matching for wildcards
224
- else
225
- "^#{regex}$" # Anchor exact matches
226
- end
227
- end
228
-
229
193
  def fetch_repository_contents
230
194
  @logger.info "Fetching repository: #{@options[:repository]} (branch: #{@options[:branch]})"
231
195
  validate_repository_access
@@ -269,87 +233,38 @@ module Gitingest
269
233
  return true if path.match?(DOT_FILE_PATTERN)
270
234
 
271
235
  # Check for directory exclusion patterns (ending with '/')
272
- @options[:exclude].each do |pattern|
273
- if pattern.end_with?("/") && path.start_with?(pattern)
274
- @logger.debug "Excluding #{path} (matched directory pattern #{pattern})" if @logger.debug?
275
- return true
276
- end
236
+ matched_dir_pattern = @directory_patterns.find { |dir_pattern| path.start_with?(dir_pattern) }
237
+ if matched_dir_pattern
238
+ @logger.debug { "Excluding #{path} (matched directory pattern: #{matched_dir_pattern})" }
239
+ return true
277
240
  end
278
241
 
279
- # Continue with regular pattern checks
280
- return true if @default_patterns.any? { |pattern| path.match?(pattern) }
281
- return true if @custom_patterns.any? { |pattern| path.match?(pattern) }
282
-
283
- @glob_patterns_with_char_classes.any? { |glob_pattern| glob_match?(glob_pattern, path) }
284
- end
285
-
286
- def glob_match?(pattern, string)
287
- return true if pattern == string
288
- return false if !pattern.match?(/[*?\[]/) && pattern != string
289
-
290
- pattern_idx = 0
291
- string_idx = 0
292
-
293
- while pattern_idx < pattern.length && string_idx < string.length
294
- case pattern[pattern_idx]
295
- when "*"
296
- pattern_idx += 1 while pattern_idx + 1 < pattern.length && pattern[pattern_idx + 1] == "*"
297
- return true if pattern_idx == pattern.length - 1
298
-
299
- next_char = pattern[pattern_idx + 1]
300
- pattern_idx += 1
301
- while string_idx < string.length
302
- break if string[string_idx] == next_char || next_char == "?" ||
303
- (next_char == "[" && char_class_match?(pattern, pattern_idx, string[string_idx]))
304
-
305
- string_idx += 1
306
- end
307
- when "?" then string_idx += 1
308
- pattern_idx += 1
309
- when "["
310
- return false unless char_class_match?(pattern, pattern_idx, string[string_idx])
311
-
312
- pattern_idx += 1
313
- pattern_idx += 1 while pattern_idx < pattern.length && pattern[pattern_idx] != "]"
314
- pattern_idx += 1
315
- string_idx += 1
316
- when string[string_idx] then string_idx += 1
317
- pattern_idx += 1
318
- else return false
319
- end
242
+ # Check default regex patterns
243
+ matched_default_pattern = @default_patterns.find { |pattern| path.match?(pattern) }
244
+ if matched_default_pattern
245
+ @logger.debug { "Excluding #{path} (matched default pattern: #{matched_default_pattern.source})" }
246
+ return true
320
247
  end
321
248
 
322
- pattern_idx += 1 while pattern_idx < pattern.length && pattern[pattern_idx] == "*"
323
- pattern_idx == pattern.length && string_idx == string.length
324
- end
325
-
326
- def char_class_match?(pattern, class_start_idx, char)
327
- idx = class_start_idx + 1
328
- match = false
329
- negate = pattern[idx] == "^" && (idx += 1)
330
-
331
- while idx < pattern.length && pattern[idx] != "]"
332
- if idx + 2 < pattern.length && pattern[idx + 1] == "-"
333
- range_start = pattern[idx]
334
- range_end = pattern[idx + 2]
335
- match = true if char >= range_start && char <= range_end
336
- idx += 3
337
- else
338
- match = true if pattern[idx] == char
339
- idx += 1
340
- end
341
- break if match
249
+ # Check custom glob patterns using File.fnmatch
250
+ matched_glob_pattern = @custom_glob_patterns.find do |glob_pattern|
251
+ File.fnmatch(glob_pattern, path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
342
252
  end
343
- negate ? !match : match
253
+ if matched_glob_pattern
254
+ @logger.debug { "Excluding #{path} (matched custom glob pattern: #{matched_glob_pattern})" }
255
+ return true
256
+ end
257
+
258
+ false
344
259
  end
345
260
 
346
261
  def process_content_to_output(output)
347
262
  @logger.debug "Using thread pool with #{@options[:threads]} threads"
348
263
  buffer = []
349
264
  progress = ProgressIndicator.new(@repo_files.size, @logger)
350
- thread_buffers = {}
351
- mutex = Mutex.new
352
- errors = []
265
+ thread_buffers = Concurrent::Map.new # Thread-safe map for buffers
266
+ mutex = Mutex.new # Mutex for shared buffer and output operations
267
+ errors = Concurrent::Array.new # Thread-safe array for errors
353
268
  pool = Concurrent::FixedThreadPool.new(@options[:threads])
354
269
  prioritized_files = prioritize_files(@repo_files)
355
270
 
@@ -380,9 +295,10 @@ module Gitingest
380
295
  end
381
296
 
382
297
  pool.shutdown
383
- pool.wait_for_termination(@options[:thread_timeout]) || (@logger.warn "Thread pool timeout, forcing termination"
384
-
385
- pool.kill)
298
+ unless pool.wait_for_termination(@options[:thread_timeout])
299
+ @logger.warn "Thread pool did not shut down gracefully within #{@options[:thread_timeout]}s, forcing termination."
300
+ pool.kill
301
+ end
386
302
 
387
303
  mutex.synchronize do
388
304
  thread_buffers.each_value { |local_buffer| buffer.concat(local_buffer) unless local_buffer.empty? }
@@ -426,11 +342,14 @@ module Gitingest
426
342
 
427
343
  def prioritize_files(files)
428
344
  files.sort_by do |file|
429
- path = file.path.downcase
430
- if path.end_with?(".md", ".txt", ".json", ".yaml", ".yml") then 0
431
- elsif path.end_with?(".rb", ".py", ".js", ".ts", ".go", ".java", ".c", ".cpp", ".h") then 1
345
+ ext = File.extname(file.path.downcase)
346
+ case ext
347
+ when ".md", ".txt", ".json", ".yaml", ".yml"
348
+ 0 # Documentation and data files first
349
+ when ".rb", ".py", ".js", ".ts", ".go", ".java", ".c", ".cpp", ".h"
350
+ 1 # Source code files second
432
351
  else
433
- 2
352
+ 2 # Other files last
434
353
  end
435
354
  end
436
355
  end
@@ -459,12 +378,14 @@ module Gitingest
459
378
  elapsed = now - @start_time
460
379
  progress_chars = (BAR_WIDTH * (current.to_f / @total)).round
461
380
  bar = "[#{"|" * progress_chars}#{" " * (BAR_WIDTH - progress_chars)}]"
462
- eta_string = current > 1 && percent < 100 ? " ETA: #{format_time((elapsed / current) * (@total - current))}" : ""
463
- rate = begin
464
- (current / elapsed).round(1)
465
- rescue StandardError
466
- 0
467
- end
381
+
382
+ rate = if elapsed.positive?
383
+ (current / elapsed).round(1)
384
+ else
385
+ 0 # Avoid division by zero if elapsed time is zero
386
+ end
387
+ eta_string = current.positive? && percent < 100 && rate.positive? ? " ETA: #{format_time((@total - current) / rate)}" : ""
388
+
468
389
  print "\r\e[K#{bar} #{percent}% | #{current}/#{@total} files (#{rate} files/sec)#{eta_string}"
469
390
  print "\n" if current == @total
470
391
  if (percent % 10).zero? && percent != @last_percent || current == @total
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Gitingest
4
- VERSION = "0.6.3"
4
+ VERSION = "0.7.1"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitingest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.3
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Davide Santangelo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-04-14 00:00:00.000000000 Z
11
+ date: 2025-06-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby