gitingest 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/index.html +20 -2
- data/lib/gitingest/generator.rb +156 -179
- data/lib/gitingest/version.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 016473eb12d22b0847b8a5f45ed24b57a6211cb7a826662a5c29f66f9160033b
         | 
| 4 | 
            +
              data.tar.gz: abd39a8c8416a9ba60627bda71e0e0c265df3fd46e12fdf4f893c40d627783a9
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 7b7f54743d80d51cb347ff13f4b5fe1cd892143df4867da989bc51b972d094c3bba0068ddd571420440c4a6fa1f7591ebdbb61ddc37f140553cea869f592c3bc
         | 
| 7 | 
            +
              data.tar.gz: d1797eef97f58f9119d474e6fdd18be3b1670b6c3bbbad35866d42f0a8f0e7066f886f7ac6ddc1da0cf7d9bd552f4f962cbc5e9b0360d04e3952c102f09877ce
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/index.html
    CHANGED
    
    | @@ -716,7 +716,7 @@ | |
| 716 716 | 
             
                    <div class="header-container">
         | 
| 717 717 | 
             
                        <div class="logo">
         | 
| 718 718 | 
             
                            <div class="logo-icon">G</div>
         | 
| 719 | 
            -
                            <div class="logo-text">Gitingest <span class="version-tag">v0.6. | 
| 719 | 
            +
                            <div class="logo-text">Gitingest <span class="version-tag">v0.6.1</span></div>
         | 
| 720 720 | 
             
                        </div>
         | 
| 721 721 | 
             
                        <nav>
         | 
| 722 722 | 
             
                            <ul>
         | 
| @@ -765,7 +765,7 @@ | |
| 765 765 | 
             
                                    <span class="terminal-command">gem install gitingest</span>
         | 
| 766 766 | 
             
                                </div>
         | 
| 767 767 | 
             
                                <div class="terminal-output">
         | 
| 768 | 
            -
                                    Successfully installed gitingest-0. | 
| 768 | 
            +
                                    Successfully installed gitingest-0.6.1<br />
         | 
| 769 769 | 
             
                                    1 gem installed
         | 
| 770 770 | 
             
                                </div>
         | 
| 771 771 | 
             
                                <div class="terminal-line">
         | 
| @@ -902,6 +902,24 @@ gitingest --repository user/repo --show-structure</code></pre> | |
| 902 902 | 
             
                    <div class="container">
         | 
| 903 903 | 
             
                        <h2>Changelog</h2>
         | 
| 904 904 | 
             
                        <div class="timeline">
         | 
| 905 | 
            +
                            <div class="timeline-item">
         | 
| 906 | 
            +
                                <div class="timeline-date">
         | 
| 907 | 
            +
                                    <span class="timeline-month">Mar</span>
         | 
| 908 | 
            +
                                    <span class="timeline-day">26</span>
         | 
| 909 | 
            +
                                </div>
         | 
| 910 | 
            +
                                <div class="timeline-content">
         | 
| 911 | 
            +
                                    <h3 class="timeline-version">v0.6.1</h3>
         | 
| 912 | 
            +
                                    <p class="timeline-desc">Fixed error in exclude patterns functionality when using glob patterns.
         | 
| 913 | 
            +
                                    </p>
         | 
| 914 | 
            +
                                    <div class="timeline-list">
         | 
| 915 | 
            +
                                        <ul>
         | 
| 916 | 
            +
                                            <li>Fixed error "target of repeat operator is not specified" when using
         | 
| 917 | 
            +
                                                <code>--exclude</code> with glob patterns like <code>*.md</code>
         | 
| 918 | 
            +
                                            </li>
         | 
| 919 | 
            +
                                        </ul>
         | 
| 920 | 
            +
                                    </div>
         | 
| 921 | 
            +
                                </div>
         | 
| 922 | 
            +
                            </div>
         | 
| 905 923 | 
             
                            <div class="timeline-item">
         | 
| 906 924 | 
             
                                <div class="timeline-date">
         | 
| 907 925 | 
             
                                    <span class="timeline-month">Mar</span>
         | 
    
        data/lib/gitingest/generator.rb
    CHANGED
    
    | @@ -56,7 +56,7 @@ module Gitingest | |
| 56 56 | 
             
                  ".*\.o$", ".*\.obj$", ".*\.dll$", ".*\.dylib$", ".*\.exe$",
         | 
| 57 57 | 
             
                  ".*\.lib$", ".*\.out$", ".*\.a$", ".*\.pdb$", ".*\.nupkg$",
         | 
| 58 58 |  | 
| 59 | 
            -
                  # Language | 
| 59 | 
            +
                  # Language-specific files
         | 
| 60 60 | 
             
                  ".*\.min\.js$", ".*\.min\.css$", ".*\.map$", ".*\.tfstate.*",
         | 
| 61 61 | 
             
                  ".*\.gem$", ".*\.ruby-version", ".*\.ruby-gemset", ".*\.rvmrc",
         | 
| 62 62 | 
             
                  ".*\.rs\.bk$", ".*\.gradle", ".*\.suo", ".*\.user", ".*\.userosscache",
         | 
| @@ -65,38 +65,24 @@ module Gitingest | |
| 65 65 | 
             
                  "\.swiftpm/", "\.build/"
         | 
| 66 66 | 
             
                ].freeze
         | 
| 67 67 |  | 
| 68 | 
            -
                #  | 
| 68 | 
            +
                # Pattern for dot files/directories
         | 
| 69 69 | 
             
                DOT_FILE_PATTERN = %r{(?-mix:(^\.|/\.))}
         | 
| 70 70 |  | 
| 71 71 | 
             
                # Maximum number of files to process to prevent memory overload
         | 
| 72 72 | 
             
                MAX_FILES = 1000
         | 
| 73 73 |  | 
| 74 | 
            -
                #  | 
| 74 | 
            +
                # Buffer size to reduce I/O operations
         | 
| 75 75 | 
             
                BUFFER_SIZE = 250
         | 
| 76 76 |  | 
| 77 | 
            -
                #  | 
| 77 | 
            +
                # Thread-local buffer threshold
         | 
| 78 78 | 
             
                LOCAL_BUFFER_THRESHOLD = 50
         | 
| 79 79 |  | 
| 80 | 
            -
                #  | 
| 80 | 
            +
                # Default threading options
         | 
| 81 81 | 
             
                DEFAULT_THREAD_COUNT = [Concurrent.processor_count, 8].min
         | 
| 82 82 | 
             
                DEFAULT_THREAD_TIMEOUT = 60 # seconds
         | 
| 83 83 |  | 
| 84 84 | 
             
                attr_reader :options, :client, :repo_files, :excluded_patterns, :logger
         | 
| 85 85 |  | 
| 86 | 
            -
                # Initialize a new Generator with the given options
         | 
| 87 | 
            -
                #
         | 
| 88 | 
            -
                # @param options [Hash] Configuration options
         | 
| 89 | 
            -
                # @option options [String] :repository GitHub repository in format "username/repo"
         | 
| 90 | 
            -
                # @option options [String] :token GitHub personal access token
         | 
| 91 | 
            -
                # @option options [String] :branch Repository branch (default: "main")
         | 
| 92 | 
            -
                # @option options [String] :output_file Output file path
         | 
| 93 | 
            -
                # @option options [Array<String>] :exclude Additional patterns to exclude
         | 
| 94 | 
            -
                # @option options [Boolean] :quiet Reduce logging to errors only
         | 
| 95 | 
            -
                # @option options [Boolean] :verbose Increase logging verbosity
         | 
| 96 | 
            -
                # @option options [Logger] :logger Custom logger instance
         | 
| 97 | 
            -
                # @option options [Integer] :threads Number of threads to use (default: auto-detected)
         | 
| 98 | 
            -
                # @option options [Integer] :thread_timeout Seconds to wait for thread pool shutdown (default: 60)
         | 
| 99 | 
            -
                # @option options [Boolean] :show_structure Show repository directory structure (default: false)
         | 
| 100 86 | 
             
                def initialize(options = {})
         | 
| 101 87 | 
             
                  @options = options
         | 
| 102 88 | 
             
                  @repo_files = []
         | 
| @@ -107,68 +93,46 @@ module Gitingest | |
| 107 93 | 
             
                  compile_excluded_patterns
         | 
| 108 94 | 
             
                end
         | 
| 109 95 |  | 
| 110 | 
            -
                # Main execution method for command line
         | 
| 111 96 | 
             
                def run
         | 
| 112 97 | 
             
                  fetch_repository_contents
         | 
| 113 | 
            -
             | 
| 114 98 | 
             
                  if @options[:show_structure]
         | 
| 115 99 | 
             
                    puts generate_directory_structure
         | 
| 116 100 | 
             
                    return
         | 
| 117 101 | 
             
                  end
         | 
| 118 | 
            -
             | 
| 119 102 | 
             
                  generate_file
         | 
| 120 103 | 
             
                end
         | 
| 121 104 |  | 
| 122 | 
            -
                # Generate content and save it to a file
         | 
| 123 | 
            -
                #
         | 
| 124 | 
            -
                # @return [String] Path to the generated file
         | 
| 125 105 | 
             
                def generate_file
         | 
| 126 106 | 
             
                  fetch_repository_contents if @repo_files.empty?
         | 
| 127 | 
            -
             | 
| 128 107 | 
             
                  @logger.info "Generating file for #{@options[:repository]}"
         | 
| 129 108 | 
             
                  File.open(@options[:output_file], "w") do |file|
         | 
| 130 109 | 
             
                    process_content_to_output(file)
         | 
| 131 110 | 
             
                  end
         | 
| 132 | 
            -
             | 
| 133 111 | 
             
                  @logger.info "Prompt generated and saved to #{@options[:output_file]}"
         | 
| 134 112 | 
             
                  @options[:output_file]
         | 
| 135 113 | 
             
                end
         | 
| 136 114 |  | 
| 137 | 
            -
                # Generate content and return it as a string
         | 
| 138 | 
            -
                # Useful for programmatic usage
         | 
| 139 | 
            -
                #
         | 
| 140 | 
            -
                # @return [String] The generated repository content
         | 
| 141 115 | 
             
                def generate_prompt
         | 
| 142 116 | 
             
                  @logger.info "Generating in-memory prompt for #{@options[:repository]}"
         | 
| 143 | 
            -
             | 
| 144 117 | 
             
                  fetch_repository_contents if @repo_files.empty?
         | 
| 145 | 
            -
             | 
| 146 118 | 
             
                  content = StringIO.new
         | 
| 147 119 | 
             
                  process_content_to_output(content)
         | 
| 148 | 
            -
             | 
| 149 120 | 
             
                  result = content.string
         | 
| 150 121 | 
             
                  @logger.info "Generated #{result.size} bytes of content in memory"
         | 
| 151 122 | 
             
                  result
         | 
| 152 123 | 
             
                end
         | 
| 153 124 |  | 
| 154 | 
            -
                # Generate a textual representation of the repository's directory structure
         | 
| 155 | 
            -
                #
         | 
| 156 | 
            -
                # @return [String] The directory structure as a formatted string
         | 
| 157 125 | 
             
                def generate_directory_structure
         | 
| 158 126 | 
             
                  fetch_repository_contents if @repo_files.empty?
         | 
| 159 | 
            -
             | 
| 160 127 | 
             
                  @logger.info "Generating directory structure for #{@options[:repository]}"
         | 
| 161 | 
            -
             | 
| 162 128 | 
             
                  repo_name = @options[:repository].split("/").last
         | 
| 163 129 | 
             
                  structure = DirectoryStructureBuilder.new(repo_name, @repo_files).build
         | 
| 164 | 
            -
             | 
| 165 130 | 
             
                  @logger.info "\n"
         | 
| 166 131 | 
             
                  structure
         | 
| 167 132 | 
             
                end
         | 
| 168 133 |  | 
| 169 134 | 
             
                private
         | 
| 170 135 |  | 
| 171 | 
            -
                # Set up logging based on verbosity options
         | 
| 172 136 | 
             
                def setup_logger
         | 
| 173 137 | 
             
                  @logger = @options[:logger] || Logger.new($stdout)
         | 
| 174 138 | 
             
                  @logger.level = if @options[:quiet]
         | 
| @@ -178,11 +142,9 @@ module Gitingest | |
| 178 142 | 
             
                                  else
         | 
| 179 143 | 
             
                                    Logger::INFO
         | 
| 180 144 | 
             
                                  end
         | 
| 181 | 
            -
                  # Simplify logger format for command line usage
         | 
| 182 145 | 
             
                  @logger.formatter = proc { |severity, _, _, msg| "#{severity == "INFO" ? "" : "[#{severity}] "}#{msg}\n" }
         | 
| 183 146 | 
             
                end
         | 
| 184 147 |  | 
| 185 | 
            -
                # Validate and set default options
         | 
| 186 148 | 
             
                def validate_options
         | 
| 187 149 | 
             
                  raise ArgumentError, "Repository is required" unless @options[:repository]
         | 
| 188 150 |  | 
| @@ -195,10 +157,8 @@ module Gitingest | |
| 195 157 | 
             
                  @excluded_patterns = DEFAULT_EXCLUDES + @options[:exclude]
         | 
| 196 158 | 
             
                end
         | 
| 197 159 |  | 
| 198 | 
            -
                # Configure the GitHub API client
         | 
| 199 160 | 
             
                def configure_client
         | 
| 200 161 | 
             
                  @client = @options[:token] ? Octokit::Client.new(access_token: @options[:token]) : Octokit::Client.new
         | 
| 201 | 
            -
             | 
| 202 162 | 
             
                  if @options[:token]
         | 
| 203 163 | 
             
                    @logger.info "Using provided GitHub token for authentication"
         | 
| 204 164 | 
             
                  else
         | 
| @@ -207,73 +167,152 @@ module Gitingest | |
| 207 167 | 
             
                  end
         | 
| 208 168 | 
             
                end
         | 
| 209 169 |  | 
| 210 | 
            -
                # Optimization: Create a combined regex for faster exclusion checking
         | 
| 211 170 | 
             
                def compile_excluded_patterns
         | 
| 212 | 
            -
                   | 
| 213 | 
            -
                  @ | 
| 171 | 
            +
                  @default_patterns = DEFAULT_EXCLUDES.map { |pattern| Regexp.new(pattern) }
         | 
| 172 | 
            +
                  @custom_patterns = []
         | 
| 173 | 
            +
                  @glob_patterns_with_char_classes = []
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                  @options[:exclude].each do |glob_pattern|
         | 
| 176 | 
            +
                    if glob_pattern.include?("[") && glob_pattern.include?("]")
         | 
| 177 | 
            +
                      @glob_patterns_with_char_classes << glob_pattern
         | 
| 178 | 
            +
                    else
         | 
| 179 | 
            +
                      @custom_patterns << Regexp.new(glob_to_regex(glob_pattern))
         | 
| 180 | 
            +
                    end
         | 
| 181 | 
            +
                  end
         | 
| 182 | 
            +
                end
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                def glob_to_regex(pattern)
         | 
| 185 | 
            +
                  result = "^"
         | 
| 186 | 
            +
                  in_brackets = false
         | 
| 187 | 
            +
                  pattern.each_char do |c|
         | 
| 188 | 
            +
                    case c
         | 
| 189 | 
            +
                    when "[" then in_brackets = true
         | 
| 190 | 
            +
                                  result += c
         | 
| 191 | 
            +
                    when "]" then in_brackets = false
         | 
| 192 | 
            +
                                  result += c
         | 
| 193 | 
            +
                    when "*" then result += in_brackets ? "*" : ".*"
         | 
| 194 | 
            +
                    when ".", "\\", "+", "?", "|", "{", "}", "(", ")", "^", "$" then result += in_brackets ? c : "\\#{c}"
         | 
| 195 | 
            +
                    else result += c
         | 
| 196 | 
            +
                    end
         | 
| 197 | 
            +
                  end
         | 
| 198 | 
            +
                  "#{result}$"
         | 
| 214 199 | 
             
                end
         | 
| 215 200 |  | 
| 216 | 
            -
                # Fetch repository contents and apply exclusion filters
         | 
| 217 201 | 
             
                def fetch_repository_contents
         | 
| 218 202 | 
             
                  @logger.info "Fetching repository: #{@options[:repository]} (branch: #{@options[:branch]})"
         | 
| 219 | 
            -
                   | 
| 220 | 
            -
             | 
| 221 | 
            -
             | 
| 222 | 
            -
             | 
| 223 | 
            -
             | 
| 224 | 
            -
                     | 
| 225 | 
            -
                      @logger.warn "Warning: Found #{@repo_files.size} files, limited to #{MAX_FILES}."
         | 
| 226 | 
            -
                      @repo_files = @repo_files.first(MAX_FILES)
         | 
| 227 | 
            -
                    end
         | 
| 228 | 
            -
                    @logger.info "Found #{@repo_files.size} files after exclusion filters"
         | 
| 229 | 
            -
                  rescue Octokit::Unauthorized
         | 
| 230 | 
            -
                    raise "Authentication error: Invalid or expired GitHub token. Please provide a valid token."
         | 
| 231 | 
            -
                  rescue Octokit::NotFound
         | 
| 232 | 
            -
                    raise "Repository not found: '#{@options[:repository]}' or branch '#{@options[:branch]}' doesn't exist or is private."
         | 
| 233 | 
            -
                  rescue Octokit::Error => e
         | 
| 234 | 
            -
                    raise "Error accessing repository: #{e.message}"
         | 
| 203 | 
            +
                  validate_repository_access
         | 
| 204 | 
            +
                  repo_tree = @client.tree(@options[:repository], @options[:branch], recursive: true)
         | 
| 205 | 
            +
                  @repo_files = repo_tree.tree.select { |item| item.type == "blob" && !excluded_file?(item.path) }
         | 
| 206 | 
            +
                  if @repo_files.size > MAX_FILES
         | 
| 207 | 
            +
                    @logger.warn "Warning: Found #{@repo_files.size} files, limited to #{MAX_FILES}."
         | 
| 208 | 
            +
                    @repo_files = @repo_files.first(MAX_FILES)
         | 
| 235 209 | 
             
                  end
         | 
| 210 | 
            +
                  @logger.info "Found #{@repo_files.size} files after exclusion filters"
         | 
| 211 | 
            +
                rescue Octokit::Unauthorized
         | 
| 212 | 
            +
                  raise "Authentication error: Invalid or expired GitHub token."
         | 
| 213 | 
            +
                rescue Octokit::NotFound
         | 
| 214 | 
            +
                  raise "Repository not found: '#{@options[:repository]}' or branch '#{@options[:branch]}' doesn't exist or is private."
         | 
| 215 | 
            +
                rescue Octokit::Error => e
         | 
| 216 | 
            +
                  raise "Error accessing repository: #{e.message}"
         | 
| 236 217 | 
             
                end
         | 
| 237 218 |  | 
| 238 219 | 
             
                # Validate repository and branch access
         | 
| 239 220 | 
             
                def validate_repository_access
         | 
| 240 | 
            -
                   | 
| 241 | 
            -
             | 
| 242 | 
            -
             | 
| 243 | 
            -
                   | 
| 244 | 
            -
             | 
| 245 | 
            -
                  rescue Octokit::NotFound
         | 
| 246 | 
            -
                    raise "Repository '#{@options[:repository]}' not found or is private. Check the repository name or provide a valid token."
         | 
| 247 | 
            -
                  end
         | 
| 221 | 
            +
                  repo = @client.repository(@options[:repository])
         | 
| 222 | 
            +
                  @options[:branch] = repo.default_branch if @options[:branch] == :default
         | 
| 223 | 
            +
             | 
| 224 | 
            +
                  # If repository check succeeds, store this fact before trying branch
         | 
| 225 | 
            +
                  @repository_exists = true
         | 
| 248 226 |  | 
| 249 227 | 
             
                  begin
         | 
| 250 228 | 
             
                    @client.branch(@options[:repository], @options[:branch])
         | 
| 251 229 | 
             
                  rescue Octokit::NotFound
         | 
| 230 | 
            +
                    # If we got here, the repository exists but the branch doesn't
         | 
| 252 231 | 
             
                    raise "Branch '#{@options[:branch]}' not found in repository '#{@options[:repository]}'"
         | 
| 253 232 | 
             
                  end
         | 
| 233 | 
            +
                rescue Octokit::Unauthorized
         | 
| 234 | 
            +
                  raise "Authentication error: Invalid or expired GitHub token"
         | 
| 235 | 
            +
                rescue Octokit::NotFound
         | 
| 236 | 
            +
                  # Only reach this for repository not found (branch errors handled separately)
         | 
| 237 | 
            +
                  raise "Repository '#{@options[:repository]}' not found or is private. Check the repository name or provide a valid token."
         | 
| 254 238 | 
             
                end
         | 
| 255 239 |  | 
| 256 | 
            -
                # Optimization: Optimized file exclusion check with combined regex
         | 
| 257 240 | 
             
                def excluded_file?(path)
         | 
| 258 | 
            -
                  path.match?( | 
| 241 | 
            +
                  return true if path.match?(DOT_FILE_PATTERN)
         | 
| 242 | 
            +
                  return true if @default_patterns.any? { |pattern| path.match?(pattern) }
         | 
| 243 | 
            +
                  return true if @custom_patterns.any? { |pattern| path.match?(pattern) }
         | 
| 244 | 
            +
             | 
| 245 | 
            +
                  @glob_patterns_with_char_classes.any? { |glob_pattern| glob_match?(glob_pattern, path) }
         | 
| 246 | 
            +
                end
         | 
| 247 | 
            +
             | 
| 248 | 
            +
                def glob_match?(pattern, string)
         | 
| 249 | 
            +
                  return true if pattern == string
         | 
| 250 | 
            +
                  return false if !pattern.match?(/[*?\[]/) && pattern != string
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                  pattern_idx = 0
         | 
| 253 | 
            +
                  string_idx = 0
         | 
| 254 | 
            +
             | 
| 255 | 
            +
                  while pattern_idx < pattern.length && string_idx < string.length
         | 
| 256 | 
            +
                    case pattern[pattern_idx]
         | 
| 257 | 
            +
                    when "*"
         | 
| 258 | 
            +
                      pattern_idx += 1 while pattern_idx + 1 < pattern.length && pattern[pattern_idx + 1] == "*"
         | 
| 259 | 
            +
                      return true if pattern_idx == pattern.length - 1
         | 
| 260 | 
            +
             | 
| 261 | 
            +
                      next_char = pattern[pattern_idx + 1]
         | 
| 262 | 
            +
                      pattern_idx += 1
         | 
| 263 | 
            +
                      while string_idx < string.length
         | 
| 264 | 
            +
                        break if string[string_idx] == next_char || next_char == "?" ||
         | 
| 265 | 
            +
                                 (next_char == "[" && char_class_match?(pattern, pattern_idx, string[string_idx]))
         | 
| 266 | 
            +
             | 
| 267 | 
            +
                        string_idx += 1
         | 
| 268 | 
            +
                      end
         | 
| 269 | 
            +
                    when "?" then string_idx += 1
         | 
| 270 | 
            +
                                  pattern_idx += 1
         | 
| 271 | 
            +
                    when "["
         | 
| 272 | 
            +
                      return false unless char_class_match?(pattern, pattern_idx, string[string_idx])
         | 
| 273 | 
            +
             | 
| 274 | 
            +
                      pattern_idx += 1
         | 
| 275 | 
            +
                      pattern_idx += 1 while pattern_idx < pattern.length && pattern[pattern_idx] != "]"
         | 
| 276 | 
            +
                      pattern_idx += 1
         | 
| 277 | 
            +
                      string_idx += 1
         | 
| 278 | 
            +
                    when string[string_idx] then string_idx += 1
         | 
| 279 | 
            +
                                                 pattern_idx += 1
         | 
| 280 | 
            +
                    else return false
         | 
| 281 | 
            +
                    end
         | 
| 282 | 
            +
                  end
         | 
| 283 | 
            +
             | 
| 284 | 
            +
                  pattern_idx += 1 while pattern_idx < pattern.length && pattern[pattern_idx] == "*"
         | 
| 285 | 
            +
                  pattern_idx == pattern.length && string_idx == string.length
         | 
| 286 | 
            +
                end
         | 
| 287 | 
            +
             | 
| 288 | 
            +
                def char_class_match?(pattern, class_start_idx, char)
         | 
| 289 | 
            +
                  idx = class_start_idx + 1
         | 
| 290 | 
            +
                  match = false
         | 
| 291 | 
            +
                  negate = pattern[idx] == "^" && (idx += 1)
         | 
| 292 | 
            +
             | 
| 293 | 
            +
                  while idx < pattern.length && pattern[idx] != "]"
         | 
| 294 | 
            +
                    if idx + 2 < pattern.length && pattern[idx + 1] == "-"
         | 
| 295 | 
            +
                      range_start = pattern[idx]
         | 
| 296 | 
            +
                      range_end = pattern[idx + 2]
         | 
| 297 | 
            +
                      match = true if char >= range_start && char <= range_end
         | 
| 298 | 
            +
                      idx += 3
         | 
| 299 | 
            +
                    else
         | 
| 300 | 
            +
                      match = true if pattern[idx] == char
         | 
| 301 | 
            +
                      idx += 1
         | 
| 302 | 
            +
                    end
         | 
| 303 | 
            +
                    break if match
         | 
| 304 | 
            +
                  end
         | 
| 305 | 
            +
                  negate ? !match : match
         | 
| 259 306 | 
             
                end
         | 
| 260 307 |  | 
| 261 | 
            -
                # Common implementation for both file and string output
         | 
| 262 308 | 
             
                def process_content_to_output(output)
         | 
| 263 309 | 
             
                  @logger.debug "Using thread pool with #{@options[:threads]} threads"
         | 
| 264 | 
            -
             | 
| 265 310 | 
             
                  buffer = []
         | 
| 266 311 | 
             
                  progress = ProgressIndicator.new(@repo_files.size, @logger)
         | 
| 267 | 
            -
             | 
| 268 | 
            -
                  # Thread-local buffers to reduce mutex contention
         | 
| 269 312 | 
             
                  thread_buffers = {}
         | 
| 270 313 | 
             
                  mutex = Mutex.new
         | 
| 271 314 | 
             
                  errors = []
         | 
| 272 | 
            -
             | 
| 273 | 
            -
                  # Thread pool based on configuration
         | 
| 274 315 | 
             
                  pool = Concurrent::FixedThreadPool.new(@options[:threads])
         | 
| 275 | 
            -
             | 
| 276 | 
            -
                  # Group files by priority
         | 
| 277 316 | 
             
                  prioritized_files = prioritize_files(@repo_files)
         | 
| 278 317 |  | 
| 279 318 | 
             
                  prioritized_files.each_with_index do |repo_file, index|
         | 
| @@ -281,12 +320,9 @@ module Gitingest | |
| 281 320 | 
             
                      thread_id = Thread.current.object_id
         | 
| 282 321 | 
             
                      thread_buffers[thread_id] ||= []
         | 
| 283 322 | 
             
                      local_buffer = thread_buffers[thread_id]
         | 
| 284 | 
            -
             | 
| 285 323 | 
             
                      begin
         | 
| 286 324 | 
             
                        content = fetch_file_content_with_retry(repo_file.path)
         | 
| 287 | 
            -
                         | 
| 288 | 
            -
                        local_buffer << result
         | 
| 289 | 
            -
             | 
| 325 | 
            +
                        local_buffer << format_file_content(repo_file.path, content)
         | 
| 290 326 | 
             
                        if local_buffer.size >= LOCAL_BUFFER_THRESHOLD
         | 
| 291 327 | 
             
                          mutex.synchronize do
         | 
| 292 328 | 
             
                            buffer.concat(local_buffer)
         | 
| @@ -294,39 +330,24 @@ module Gitingest | |
| 294 330 | 
             
                            local_buffer.clear
         | 
| 295 331 | 
             
                          end
         | 
| 296 332 | 
             
                        end
         | 
| 297 | 
            -
             | 
| 298 333 | 
             
                        progress.update(index + 1)
         | 
| 299 334 | 
             
                      rescue Octokit::Error => e
         | 
| 300 | 
            -
                        mutex.synchronize  | 
| 301 | 
            -
             | 
| 302 | 
            -
                          @logger.error "Error fetching #{repo_file.path}: #{e.message}"
         | 
| 303 | 
            -
                        end
         | 
| 335 | 
            +
                        mutex.synchronize { errors << "Error fetching #{repo_file.path}: #{e.message}" }
         | 
| 336 | 
            +
                        @logger.error "Error fetching #{repo_file.path}: #{e.message}"
         | 
| 304 337 | 
             
                      rescue StandardError => e
         | 
| 305 | 
            -
                        mutex.synchronize  | 
| 306 | 
            -
             | 
| 307 | 
            -
                          @logger.error "Unexpected error processing #{repo_file.path}: #{e.message}"
         | 
| 308 | 
            -
                        end
         | 
| 338 | 
            +
                        mutex.synchronize { errors << "Unexpected error processing #{repo_file.path}: #{e.message}" }
         | 
| 339 | 
            +
                        @logger.error "Unexpected error processing #{repo_file.path}: #{e.message}"
         | 
| 309 340 | 
             
                      end
         | 
| 310 341 | 
             
                    end
         | 
| 311 342 | 
             
                  end
         | 
| 312 343 |  | 
| 313 | 
            -
                   | 
| 314 | 
            -
             | 
| 315 | 
            -
                    wait_success = pool.wait_for_termination(@options[:thread_timeout])
         | 
| 344 | 
            +
                  pool.shutdown
         | 
| 345 | 
            +
                  pool.wait_for_termination(@options[:thread_timeout]) || (@logger.warn "Thread pool timeout, forcing termination"
         | 
| 316 346 |  | 
| 317 | 
            -
             | 
| 318 | 
            -
                      @logger.warn "Thread pool did not shut down within #{@options[:thread_timeout]} seconds, forcing termination"
         | 
| 319 | 
            -
                      pool.kill
         | 
| 320 | 
            -
                    end
         | 
| 321 | 
            -
                  rescue StandardError => e
         | 
| 322 | 
            -
                    @logger.error "Error during thread pool shutdown: #{e.message}"
         | 
| 323 | 
            -
                  end
         | 
| 347 | 
            +
                                                                           pool.kill)
         | 
| 324 348 |  | 
| 325 | 
            -
                  # Process remaining files in thread-local buffers
         | 
| 326 349 | 
             
                  mutex.synchronize do
         | 
| 327 | 
            -
                    thread_buffers.each_value  | 
| 328 | 
            -
                      buffer.concat(local_buffer) unless local_buffer.empty?
         | 
| 329 | 
            -
                    end
         | 
| 350 | 
            +
                    thread_buffers.each_value { |local_buffer| buffer.concat(local_buffer) unless local_buffer.empty? }
         | 
| 330 351 | 
             
                    write_buffer(output, buffer) unless buffer.empty?
         | 
| 331 352 | 
             
                  end
         | 
| 332 353 |  | 
| @@ -336,7 +357,6 @@ module Gitingest | |
| 336 357 | 
             
                  @logger.debug "First few errors: #{errors.first(3).join(", ")}" if @logger.debug?
         | 
| 337 358 | 
             
                end
         | 
| 338 359 |  | 
| 339 | 
            -
                # Format a file's content for the prompt
         | 
| 340 360 | 
             
                def format_file_content(path, content)
         | 
| 341 361 | 
             
                  <<~TEXT
         | 
| 342 362 | 
             
                    ================================================================
         | 
| @@ -347,21 +367,18 @@ module Gitingest | |
| 347 367 | 
             
                  TEXT
         | 
| 348 368 | 
             
                end
         | 
| 349 369 |  | 
| 350 | 
            -
                # Optimization: Fetch file content with exponential backoff for rate limiting
         | 
| 351 370 | 
             
                def fetch_file_content_with_retry(path, retries = 3, base_delay = 2)
         | 
| 352 371 | 
             
                  content = @client.contents(@options[:repository], path: path, ref: @options[:branch])
         | 
| 353 372 | 
             
                  Base64.decode64(content.content)
         | 
| 354 373 | 
             
                rescue Octokit::TooManyRequests
         | 
| 355 374 | 
             
                  raise unless retries.positive?
         | 
| 356 375 |  | 
| 357 | 
            -
                  # Optimization: Exponential backoff with jitter for better rate limit handling
         | 
| 358 376 | 
             
                  delay = base_delay**(4 - retries) * (0.8 + 0.4 * rand)
         | 
| 359 377 | 
             
                  @logger.warn "Rate limit exceeded, waiting #{delay.round(1)} seconds..."
         | 
| 360 378 | 
             
                  sleep(delay)
         | 
| 361 379 | 
             
                  fetch_file_content_with_retry(path, retries - 1, base_delay)
         | 
| 362 380 | 
             
                end
         | 
| 363 381 |  | 
| 364 | 
            -
                # Write buffer contents to file and clear buffer
         | 
| 365 382 | 
             
                def write_buffer(file, buffer)
         | 
| 366 383 | 
             
                  return if buffer.empty?
         | 
| 367 384 |  | 
| @@ -369,26 +386,20 @@ module Gitingest | |
| 369 386 | 
             
                  buffer.clear
         | 
| 370 387 | 
             
                end
         | 
| 371 388 |  | 
| 372 | 
            -
                # Sort files by estimated processing priority
         | 
| 373 389 | 
             
                def prioritize_files(files)
         | 
| 374 | 
            -
                  # Sort files by estimated size (based on extension)
         | 
| 375 | 
            -
                  # This helps with better thread distribution - process small files first
         | 
| 376 390 | 
             
                  files.sort_by do |file|
         | 
| 377 391 | 
             
                    path = file.path.downcase
         | 
| 378 | 
            -
                    if path.end_with?(".md", ".txt", ".json", ".yaml", ".yml")
         | 
| 379 | 
            -
             | 
| 380 | 
            -
                    elsif path.end_with?(".rb", ".py", ".js", ".ts", ".go", ".java", ".c", ".cpp", ".h")
         | 
| 381 | 
            -
                      1  # Then process code files (medium size)
         | 
| 392 | 
            +
                    if path.end_with?(".md", ".txt", ".json", ".yaml", ".yml") then 0
         | 
| 393 | 
            +
                    elsif path.end_with?(".rb", ".py", ".js", ".ts", ".go", ".java", ".c", ".cpp", ".h") then 1
         | 
| 382 394 | 
             
                    else
         | 
| 383 | 
            -
                      2 | 
| 395 | 
            +
                      2
         | 
| 384 396 | 
             
                    end
         | 
| 385 397 | 
             
                  end
         | 
| 386 398 | 
             
                end
         | 
| 387 399 | 
             
              end
         | 
| 388 400 |  | 
| 389 | 
            -
              # Helper class for showing progress in CLI with visual bar
         | 
| 390 401 | 
             
              class ProgressIndicator
         | 
| 391 | 
            -
                BAR_WIDTH = 30 | 
| 402 | 
            +
                BAR_WIDTH = 30
         | 
| 392 403 |  | 
| 393 404 | 
             
                def initialize(total, logger)
         | 
| 394 405 | 
             
                  @total = total
         | 
| @@ -396,77 +407,47 @@ module Gitingest | |
| 396 407 | 
             
                  @last_percent = 0
         | 
| 397 408 | 
             
                  @start_time = Time.now
         | 
| 398 409 | 
             
                  @last_update_time = Time.now
         | 
| 399 | 
            -
                  @update_interval = 0.5 | 
| 410 | 
            +
                  @update_interval = 0.5
         | 
| 400 411 | 
             
                end
         | 
| 401 412 |  | 
| 402 | 
            -
                # Update progress with visual bar
         | 
| 403 413 | 
             
                def update(current)
         | 
| 404 | 
            -
                  # Avoid updating too frequently
         | 
| 405 414 | 
             
                  now = Time.now
         | 
| 406 415 | 
             
                  return if now - @last_update_time < @update_interval && current != @total
         | 
| 407 416 |  | 
| 408 417 | 
             
                  @last_update_time = now
         | 
| 409 418 | 
             
                  percent = (current.to_f / @total * 100).round
         | 
| 410 | 
            -
             | 
| 411 | 
            -
                  # Only update at meaningful increments or completion
         | 
| 412 419 | 
             
                  return unless percent > @last_percent || current == @total
         | 
| 413 420 |  | 
| 414 421 | 
             
                  elapsed = now - @start_time
         | 
| 415 | 
            -
             | 
| 416 | 
            -
                  # Generate progress bar
         | 
| 417 422 | 
             
                  progress_chars = (BAR_WIDTH * (current.to_f / @total)).round
         | 
| 418 423 | 
             
                  bar = "[#{"|" * progress_chars}#{" " * (BAR_WIDTH - progress_chars)}]"
         | 
| 419 | 
            -
             | 
| 420 | 
            -
                  # Calculate ETA
         | 
| 421 | 
            -
                  eta_string = ""
         | 
| 422 | 
            -
                  if current > 1 && percent < 100
         | 
| 423 | 
            -
                    remaining = (elapsed / current) * (@total - current)
         | 
| 424 | 
            -
                    eta_string = " ETA: #{format_time(remaining)}"
         | 
| 425 | 
            -
                  end
         | 
| 426 | 
            -
             | 
| 427 | 
            -
                  # Calculate rate (files per second)
         | 
| 424 | 
            +
                  eta_string = current > 1 && percent < 100 ? " ETA: #{format_time((elapsed / current) * (@total - current))}" : ""
         | 
| 428 425 | 
             
                  rate = begin
         | 
| 429 | 
            -
                    current / elapsed
         | 
| 426 | 
            +
                    (current / elapsed).round(1)
         | 
| 430 427 | 
             
                  rescue StandardError
         | 
| 431 428 | 
             
                    0
         | 
| 432 429 | 
             
                  end
         | 
| 433 | 
            -
                   | 
| 434 | 
            -
             | 
| 435 | 
            -
                  # Clear line and print progress bar
         | 
| 436 | 
            -
                  print "\r\e[K" # Clear the line
         | 
| 437 | 
            -
                  print "#{bar} #{percent}% | #{current}/#{@total} files#{rate_string}#{eta_string}"
         | 
| 438 | 
            -
                  print "\n" if current == @total # Add newline when complete
         | 
| 439 | 
            -
             | 
| 440 | 
            -
                  # Also log to logger at less frequent intervals
         | 
| 430 | 
            +
                  print "\r\e[K#{bar} #{percent}% | #{current}/#{@total} files (#{rate} files/sec)#{eta_string}"
         | 
| 431 | 
            +
                  print "\n" if current == @total
         | 
| 441 432 | 
             
                  if (percent % 10).zero? && percent != @last_percent || current == @total
         | 
| 442 433 | 
             
                    @logger.info "Processing: #{percent}% complete (#{current}/#{@total} files)#{eta_string}"
         | 
| 443 434 | 
             
                  end
         | 
| 444 | 
            -
             | 
| 445 435 | 
             
                  @last_percent = percent
         | 
| 446 436 | 
             
                end
         | 
| 447 437 |  | 
| 448 438 | 
             
                private
         | 
| 449 439 |  | 
| 450 | 
            -
                # Format seconds into a human-readable time string
         | 
| 451 440 | 
             
                def format_time(seconds)
         | 
| 452 441 | 
             
                  return "< 1s" if seconds < 1
         | 
| 453 442 |  | 
| 454 443 | 
             
                  case seconds
         | 
| 455 | 
            -
                  when 0...60
         | 
| 456 | 
            -
             | 
| 457 | 
            -
                   | 
| 458 | 
            -
                    minutes = (seconds / 60).floor
         | 
| 459 | 
            -
                    secs = (seconds % 60).round
         | 
| 460 | 
            -
                    "#{minutes}m #{secs}s"
         | 
| 461 | 
            -
                  else
         | 
| 462 | 
            -
                    hours = (seconds / 3600).floor
         | 
| 463 | 
            -
                    minutes = ((seconds % 3600) / 60).floor
         | 
| 464 | 
            -
                    "#{hours}h #{minutes}m"
         | 
| 444 | 
            +
                  when 0...60 then "#{seconds.round}s"
         | 
| 445 | 
            +
                  when 60...3600 then "#{(seconds / 60).floor}m #{(seconds % 60).round}s"
         | 
| 446 | 
            +
                  else "#{(seconds / 3600).floor}h #{((seconds % 3600) / 60).floor}m"
         | 
| 465 447 | 
             
                  end
         | 
| 466 448 | 
             
                end
         | 
| 467 449 | 
             
              end
         | 
| 468 450 |  | 
| 469 | 
            -
              # Helper class to build directory structure visualization
         | 
| 470 451 | 
             
              class DirectoryStructureBuilder
         | 
| 471 452 | 
             
                def initialize(root_name, files)
         | 
| 472 453 | 
             
                  @root_name = root_name
         | 
| @@ -475,21 +456,17 @@ module Gitingest | |
| 475 456 |  | 
| 476 457 | 
             
                def build
         | 
| 477 458 | 
             
                  tree = { @root_name => {} }
         | 
| 478 | 
            -
             | 
| 479 459 | 
             
                  @files.sort.each do |path|
         | 
| 480 460 | 
             
                    parts = path.split("/")
         | 
| 481 461 | 
             
                    current = tree[@root_name]
         | 
| 482 | 
            -
             | 
| 483 462 | 
             
                    parts.each do |part|
         | 
| 484 | 
            -
                      if part == parts.last
         | 
| 485 | 
            -
                        current[part] = nil
         | 
| 463 | 
            +
                      if part == parts.last then current[part] = nil
         | 
| 486 464 | 
             
                      else
         | 
| 487 465 | 
             
                        current[part] ||= {}
         | 
| 488 466 | 
             
                        current = current[part]
         | 
| 489 467 | 
             
                      end
         | 
| 490 468 | 
             
                    end
         | 
| 491 469 | 
             
                  end
         | 
| 492 | 
            -
             | 
| 493 470 | 
             
                  output = ["Directory structure:"]
         | 
| 494 471 | 
             
                  render_tree(tree, "", output)
         | 
| 495 472 | 
             
                  output.join("\n")
         | 
| @@ -502,18 +479,18 @@ module Gitingest | |
| 502 479 |  | 
| 503 480 | 
             
                  tree.keys.each_with_index do |key, index|
         | 
| 504 481 | 
             
                    is_last = index == tree.keys.size - 1
         | 
| 505 | 
            -
                    current_prefix = prefix
         | 
| 506 | 
            -
             | 
| 507 | 
            -
             | 
| 508 | 
            -
             | 
| 509 | 
            -
             | 
| 510 | 
            -
                     | 
| 511 | 
            -
             | 
| 512 | 
            -
             | 
| 513 | 
            -
             | 
| 514 | 
            -
             | 
| 515 | 
            -
                     | 
| 516 | 
            -
             | 
| 482 | 
            +
                    current_prefix = if prefix.empty?
         | 
| 483 | 
            +
                                       "    "
         | 
| 484 | 
            +
                                     else
         | 
| 485 | 
            +
                                       prefix + (is_last ? "    " : "│   ")
         | 
| 486 | 
            +
                                     end
         | 
| 487 | 
            +
                    connector = if prefix.empty?
         | 
| 488 | 
            +
                                  "└── "
         | 
| 489 | 
            +
                                else
         | 
| 490 | 
            +
                                  (is_last ? "└── " : "├── ")
         | 
| 491 | 
            +
                                end
         | 
| 492 | 
            +
                    item = tree[key].is_a?(Hash) ? "#{key}/" : key
         | 
| 493 | 
            +
                    output << "#{prefix}#{connector}#{item}"
         | 
| 517 494 | 
             
                    render_tree(tree[key], current_prefix, output) if tree[key].is_a?(Hash)
         | 
| 518 495 | 
             
                  end
         | 
| 519 496 | 
             
                end
         | 
    
        data/lib/gitingest/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: gitingest
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.6. | 
| 4 | 
            +
              version: 0.6.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Davide Santangelo
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2025-03- | 
| 11 | 
            +
            date: 2025-03-26 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: concurrent-ruby
         |