gitingest 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -1
- data/README.md +62 -17
- data/bin/gitingest +37 -7
- data/lib/gitingest/generator.rb +107 -39
- data/lib/gitingest/version.rb +1 -1
- metadata +15 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd7a1e5d5ced0b5449fa30671b0d9a536685d37c3d0d34d33437f652df24c199
|
4
|
+
data.tar.gz: c49a7c6489f7074e3870a05b5d1e47b0ea3e6b7a6eadce405db3c300d8165434
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 64b73ea01bc836a500c82a260c41be6f87e6f0c72bf868bee059407eea45466ad4892d905b7b69f5c6151e40e92d2553c8b8f678bfe1155fef968486648ae871
|
7
|
+
data.tar.gz: 7c18261e6fdb279916d2f8bff4557de76eb6ec0c039861d646f5f56dadacbef536b176d2a48fb25d142b70e30e868fb431f9cde6aad69262ee6f8dc37232ea0a
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,27 @@
|
|
2
2
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
4
4
|
|
5
|
+
## [0.3.0] - 2025-03-02
|
6
|
+
- Added `faraday-retry` gem dependency for better API rate limit handling
|
7
|
+
- Implemented thread-safe buffer management with mutex locks
|
8
|
+
- Added new `ProgressIndicator` class for better CLI progress reporting (showing percentages)
|
9
|
+
- Improved memory efficiency with configurable buffer size
|
10
|
+
- Enhanced code organization with dedicated methods for file content formatting
|
11
|
+
- Added comprehensive method documentation and parameter descriptions
|
12
|
+
- Optimized thread pool size calculation for better performance
|
13
|
+
- Improved error handling in concurrent operations
|
14
|
+
|
15
|
+
## [0.2.0] - 2025-03-02
|
16
|
+
- Added support for quiet and verbose modes in the command-line interface
|
17
|
+
- Added the ability to specify a custom output file for the prompt
|
18
|
+
- Enhanced error handling with logging support
|
19
|
+
- Added logging functionality with custom loggers
|
20
|
+
- Introduced rate limit handling with retries for file fetching
|
21
|
+
- Added repository branch support
|
22
|
+
- Exclude specific file patterns via command-line arguments
|
23
|
+
- Enforced a 1000 file limit to prevent memory overload
|
24
|
+
- Updated version to 0.2.0
|
25
|
+
|
5
26
|
## [0.1.0] - 2025-03-02
|
6
27
|
|
7
28
|
### Added
|
@@ -15,4 +36,4 @@ All notable changes to this project will be documented in this file.
|
|
15
36
|
- Automatic rate limit handling with retry mechanism
|
16
37
|
- Repository prompt generation with file separation markers
|
17
38
|
- Support for custom branch selection
|
18
|
-
- Custom output file naming options
|
39
|
+
- Custom output file naming options
|
data/README.md
CHANGED
@@ -24,7 +24,26 @@ bundle exec rake install
|
|
24
24
|
### Command Line
|
25
25
|
|
26
26
|
```bash
|
27
|
-
|
27
|
+
# Basic usage (public repository)
|
28
|
+
gitingest --repository user/repo
|
29
|
+
|
30
|
+
# With GitHub token for private repositories
|
31
|
+
gitingest --repository user/repo --token YOUR_GITHUB_TOKEN
|
32
|
+
|
33
|
+
# Specify a custom output file
|
34
|
+
gitingest --repository user/repo --output my_prompt.txt
|
35
|
+
|
36
|
+
# Specify a different branch
|
37
|
+
gitingest --repository user/repo --branch develop
|
38
|
+
|
39
|
+
# Exclude additional patterns
|
40
|
+
gitingest --repository user/repo --exclude "*.md,docs/"
|
41
|
+
|
42
|
+
# Quiet mode
|
43
|
+
gitingest --repository user/repo --quiet
|
44
|
+
|
45
|
+
# Verbose mode
|
46
|
+
gitingest --repository user/repo --verbose
|
28
47
|
```
|
29
48
|
|
30
49
|
#### Available Options
|
@@ -39,17 +58,32 @@ gitingest --repository username/repo --token YOUR_GITHUB_TOKEN --output output.t
|
|
39
58
|
### As a Library
|
40
59
|
|
41
60
|
```ruby
|
42
|
-
require
|
61
|
+
require "gitingest"
|
43
62
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
}
|
63
|
+
# Basic usage
|
64
|
+
generator = Gitingest::Generator.new(
|
65
|
+
repository: "user/repo",
|
66
|
+
token: "YOUR_GITHUB_TOKEN" # optional
|
67
|
+
)
|
68
|
+
generator.run
|
51
69
|
|
52
|
-
|
70
|
+
# With custom options
|
71
|
+
generator = Gitingest::Generator.new(
|
72
|
+
repository: "user/repo",
|
73
|
+
token: "YOUR_GITHUB_TOKEN",
|
74
|
+
output_file: "my_prompt.txt",
|
75
|
+
branch: "develop",
|
76
|
+
exclude: ["*.md", "docs/"],
|
77
|
+
quiet: true # or verbose: true
|
78
|
+
)
|
79
|
+
generator.run
|
80
|
+
|
81
|
+
# With custom logger
|
82
|
+
custom_logger = Logger.new("gitingest.log")
|
83
|
+
generator = Gitingest::Generator.new(
|
84
|
+
repository: "user/repo",
|
85
|
+
logger: custom_logger
|
86
|
+
)
|
53
87
|
generator.run
|
54
88
|
```
|
55
89
|
|
@@ -64,19 +98,30 @@ generator.run
|
|
64
98
|
|
65
99
|
## Default Exclusion Patterns
|
66
100
|
|
67
|
-
By default,
|
101
|
+
By default, the generator excludes files and directories commonly ignored in repositories, such as:
|
102
|
+
|
103
|
+
- Version control files (`.git/`, `.svn/`)
|
104
|
+
- System files (`.DS_Store`, `Thumbs.db`)
|
105
|
+
- Log files (`*.log`, `*.bak`)
|
106
|
+
- Images and media files (`*.png`, `*.jpg`, `*.mp3`)
|
107
|
+
- Archives (`*.zip`, `*.tar.gz`)
|
108
|
+
- Dependency directories (`node_modules/`, `vendor/`)
|
109
|
+
- Compiled and binary files (`*.pyc`, `*.class`, `*.exe`)
|
68
110
|
|
69
|
-
|
70
|
-
|
71
|
-
-
|
72
|
-
-
|
73
|
-
-
|
74
|
-
- Dependency directories: `node_modules/`, `vendor/`
|
111
|
+
## Limitations
|
112
|
+
|
113
|
+
- To prevent memory overload, only the first 1000 files will be processed
|
114
|
+
- API requests are subject to GitHub limits (60 requests/hour without token, 5000 requests/hour with token)
|
115
|
+
- Private repositories require a GitHub personal access token
|
75
116
|
|
76
117
|
## Contributing
|
77
118
|
|
78
119
|
Bug reports and pull requests are welcome on GitHub at https://github.com/davidesantangelo/gitingest.
|
79
120
|
|
121
|
+
## Acknowledgements
|
122
|
+
|
123
|
+
Inspired by [`cyclotruc/gitingest`](https://github.com/cyclotruc/gitingest).
|
124
|
+
|
80
125
|
## License
|
81
126
|
|
82
127
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/bin/gitingest
CHANGED
@@ -6,15 +6,38 @@ require "gitingest"
|
|
6
6
|
|
7
7
|
options = {}
|
8
8
|
parser = OptionParser.new do |opts|
|
9
|
-
opts.banner = "Usage: gitingest [
|
9
|
+
opts.banner = "Usage: gitingest [OPTIONS]"
|
10
|
+
opts.separator ""
|
11
|
+
opts.separator "Options:"
|
12
|
+
|
13
|
+
opts.on("-r", "--repository REPO", "GitHub repository (username/repo) [Required]") do |repo|
|
14
|
+
options[:repository] = repo
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-t", "--token TOKEN", "GitHub personal access token") do |token|
|
18
|
+
options[:token] = token
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-o", "--output FILE", "Output file for the prompt") do |file|
|
22
|
+
options[:output_file] = file
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-b", "--branch BRANCH", "Repository branch") do |branch|
|
26
|
+
options[:branch] = branch
|
27
|
+
end
|
10
28
|
|
11
|
-
opts.on("-r", "--repository REPO", "GitHub repository (username/repo)") { |repo| options[:repository] = repo }
|
12
|
-
opts.on("-t", "--token TOKEN", "GitHub personal access token") { |token| options[:token] = token }
|
13
|
-
opts.on("-o", "--output FILE", "Output file for the prompt") { |file| options[:output_file] = file }
|
14
29
|
opts.on("-e", "--exclude PATTERN", "File patterns to exclude (comma separated)") do |pattern|
|
15
|
-
options[:exclude] = pattern.split(",")
|
30
|
+
options[:exclude] = pattern.split(",")
|
16
31
|
end
|
17
|
-
|
32
|
+
|
33
|
+
opts.on("-q", "--quiet", "Disable all output except errors") do
|
34
|
+
options[:quiet] = true
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on("-v", "--verbose", "Enable verbose output") do
|
38
|
+
options[:verbose] = true
|
39
|
+
end
|
40
|
+
|
18
41
|
opts.on("-h", "--help", "Show this help message") do
|
19
42
|
puts opts
|
20
43
|
exit
|
@@ -23,9 +46,16 @@ end
|
|
23
46
|
|
24
47
|
begin
|
25
48
|
parser.parse!
|
49
|
+
|
50
|
+
if options[:repository].nil?
|
51
|
+
puts "Error: Repository option is required"
|
52
|
+
puts parser
|
53
|
+
exit 1
|
54
|
+
end
|
55
|
+
|
26
56
|
generator = Gitingest::Generator.new(options)
|
27
57
|
generator.run
|
28
|
-
rescue
|
58
|
+
rescue OptionParser::InvalidOption => e
|
29
59
|
puts "Error: #{e.message}"
|
30
60
|
puts parser
|
31
61
|
exit 1
|
data/lib/gitingest/generator.rb
CHANGED
@@ -4,6 +4,7 @@ require "octokit"
|
|
4
4
|
require "base64"
|
5
5
|
require "fileutils"
|
6
6
|
require "concurrent"
|
7
|
+
require "logger"
|
7
8
|
|
8
9
|
module Gitingest
|
9
10
|
class Generator
|
@@ -66,19 +67,54 @@ module Gitingest
|
|
66
67
|
|
67
68
|
# Maximum number of files to process to prevent memory overload
|
68
69
|
MAX_FILES = 1000
|
69
|
-
|
70
|
-
|
71
|
-
|
70
|
+
BUFFER_SIZE = 100 # Write every 100 files to reduce I/O operations
|
71
|
+
|
72
|
+
attr_reader :options, :client, :repo_files, :excluded_patterns, :logger
|
73
|
+
|
74
|
+
# Initialize a new Generator with the given options
|
75
|
+
#
|
76
|
+
# @param options [Hash] Configuration options
|
77
|
+
# @option options [String] :repository GitHub repository in format "username/repo"
|
78
|
+
# @option options [String] :token GitHub personal access token
|
79
|
+
# @option options [String] :branch Repository branch (default: "main")
|
80
|
+
# @option options [String] :output_file Output file path
|
81
|
+
# @option options [Array<String>] :exclude Additional patterns to exclude
|
82
|
+
# @option options [Boolean] :quiet Reduce logging to errors only
|
83
|
+
# @option options [Boolean] :verbose Increase logging verbosity
|
84
|
+
# @option options [Logger] :logger Custom logger instance
|
72
85
|
def initialize(options = {})
|
73
86
|
@options = options
|
74
87
|
@repo_files = []
|
75
88
|
@excluded_patterns = []
|
89
|
+
setup_logger
|
76
90
|
validate_options
|
77
91
|
configure_client
|
78
92
|
compile_excluded_patterns
|
79
93
|
end
|
80
94
|
|
81
|
-
|
95
|
+
# Main execution method
|
96
|
+
def run
|
97
|
+
fetch_repository_contents
|
98
|
+
generate_prompt
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
# Set up logging based on verbosity options
|
104
|
+
def setup_logger
|
105
|
+
@logger = @options[:logger] || Logger.new($stdout)
|
106
|
+
@logger.level = if @options[:quiet]
|
107
|
+
Logger::ERROR
|
108
|
+
elsif @options[:verbose]
|
109
|
+
Logger::DEBUG
|
110
|
+
else
|
111
|
+
Logger::INFO
|
112
|
+
end
|
113
|
+
# Simplify logger format for command line usage
|
114
|
+
@logger.formatter = proc { |severity, _, _, msg| "#{severity == "INFO" ? "" : "[#{severity}] "}#{msg}\n" }
|
115
|
+
end
|
116
|
+
|
117
|
+
# Validate and set default options
|
82
118
|
def validate_options
|
83
119
|
raise ArgumentError, "Repository is required" unless @options[:repository]
|
84
120
|
|
@@ -88,37 +124,36 @@ module Gitingest
|
|
88
124
|
@excluded_patterns = DEFAULT_EXCLUDES + @options[:exclude]
|
89
125
|
end
|
90
126
|
|
91
|
-
|
127
|
+
# Configure the GitHub API client
|
92
128
|
def configure_client
|
93
129
|
@client = @options[:token] ? Octokit::Client.new(access_token: @options[:token]) : Octokit::Client.new
|
94
130
|
|
95
131
|
if @options[:token]
|
96
|
-
|
132
|
+
@logger.info "Using provided GitHub token for authentication"
|
97
133
|
else
|
98
|
-
|
99
|
-
|
134
|
+
@logger.warn "Warning: No token provided. API rate limits will be restricted and private repositories will be inaccessible."
|
135
|
+
@logger.warn "For better results, provide a GitHub token with the --token option."
|
100
136
|
end
|
101
137
|
end
|
102
138
|
|
139
|
+
# Convert exclusion patterns to regular expressions
|
103
140
|
def compile_excluded_patterns
|
104
141
|
@excluded_patterns = @excluded_patterns.map { |pattern| Regexp.new(pattern) }
|
105
142
|
end
|
106
143
|
|
107
|
-
|
144
|
+
# Fetch repository contents and apply exclusion filters
|
108
145
|
def fetch_repository_contents
|
109
|
-
|
146
|
+
@logger.info "Fetching repository: #{@options[:repository]} (branch: #{@options[:branch]})"
|
110
147
|
begin
|
111
|
-
# First validate authentication and repository access
|
112
148
|
validate_repository_access
|
113
|
-
|
114
149
|
repo_tree = @client.tree(@options[:repository], @options[:branch], recursive: true)
|
115
150
|
@repo_files = repo_tree.tree.select { |item| item.type == "blob" && !excluded_file?(item.path) }
|
116
151
|
|
117
152
|
if @repo_files.size > MAX_FILES
|
118
|
-
|
153
|
+
@logger.warn "Warning: Found #{@repo_files.size} files, limited to #{MAX_FILES}."
|
119
154
|
@repo_files = @repo_files.first(MAX_FILES)
|
120
155
|
end
|
121
|
-
|
156
|
+
@logger.info "Found #{@repo_files.size} files after exclusion filters"
|
122
157
|
rescue Octokit::Unauthorized
|
123
158
|
raise "Authentication error: Invalid or expired GitHub token. Please provide a valid token."
|
124
159
|
rescue Octokit::NotFound
|
@@ -128,8 +163,8 @@ module Gitingest
|
|
128
163
|
end
|
129
164
|
end
|
130
165
|
|
166
|
+
# Validate repository and branch access
|
131
167
|
def validate_repository_access
|
132
|
-
# Check if we can access the repository
|
133
168
|
begin
|
134
169
|
@client.repository(@options[:repository])
|
135
170
|
rescue Octokit::Unauthorized
|
@@ -138,7 +173,6 @@ module Gitingest
|
|
138
173
|
raise "Repository '#{@options[:repository]}' not found or is private. Check the repository name or provide a valid token."
|
139
174
|
end
|
140
175
|
|
141
|
-
# Check if the branch exists
|
142
176
|
begin
|
143
177
|
@client.branch(@options[:repository], @options[:branch])
|
144
178
|
rescue Octokit::NotFound
|
@@ -146,47 +180,64 @@ module Gitingest
|
|
146
180
|
end
|
147
181
|
end
|
148
182
|
|
183
|
+
# Check if a file should be excluded based on its path
|
149
184
|
def excluded_file?(path)
|
150
185
|
return true if path.start_with?(".") || path.split("/").any? { |part| part.start_with?(".") }
|
151
186
|
|
152
187
|
@excluded_patterns.any? { |pattern| path.match?(pattern) }
|
153
188
|
end
|
154
189
|
|
155
|
-
|
190
|
+
# Generate the consolidated prompt file
|
156
191
|
def generate_prompt
|
157
|
-
|
158
|
-
Concurrent::Array.new(@repo_files)
|
192
|
+
@logger.info "Generating prompt..."
|
159
193
|
buffer = []
|
160
|
-
|
194
|
+
progress = ProgressIndicator.new(@repo_files.size, @logger)
|
161
195
|
|
162
196
|
# Dynamic thread pool based on core count
|
163
|
-
pool = Concurrent::FixedThreadPool.new([Concurrent.processor_count, 5].
|
197
|
+
pool = Concurrent::FixedThreadPool.new([Concurrent.processor_count, 5].min)
|
164
198
|
|
165
199
|
File.open(@options[:output_file], "w") do |file|
|
166
200
|
@repo_files.each_with_index do |repo_file, index|
|
167
201
|
pool.post do
|
168
202
|
content = fetch_file_content_with_retry(repo_file.path)
|
169
|
-
result =
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
print "\rProcessing: #{index + 1}/#{@repo_files.size} files"
|
203
|
+
result = format_file_content(repo_file.path, content)
|
204
|
+
|
205
|
+
# Thread-safe buffer management
|
206
|
+
buffer_mutex.synchronize do
|
207
|
+
buffer << result
|
208
|
+
write_buffer(file, buffer) if buffer.size >= BUFFER_SIZE
|
209
|
+
end
|
210
|
+
|
211
|
+
progress.update(index + 1)
|
179
212
|
rescue Octokit::Error => e
|
180
|
-
|
213
|
+
@logger.error "Error fetching #{repo_file.path}: #{e.message}"
|
181
214
|
end
|
182
215
|
end
|
216
|
+
|
183
217
|
pool.shutdown
|
184
218
|
pool.wait_for_termination
|
185
|
-
|
219
|
+
|
220
|
+
# Write any remaining files in buffer
|
221
|
+
buffer_mutex.synchronize do
|
222
|
+
write_buffer(file, buffer) unless buffer.empty?
|
223
|
+
end
|
186
224
|
end
|
187
|
-
|
225
|
+
|
226
|
+
@logger.info "Prompt generated and saved to #{@options[:output_file]}"
|
188
227
|
end
|
189
228
|
|
229
|
+
# Format a file's content for the prompt
|
230
|
+
def format_file_content(path, content)
|
231
|
+
<<~TEXT
|
232
|
+
================================================================
|
233
|
+
File: #{path}
|
234
|
+
================================================================
|
235
|
+
#{content}
|
236
|
+
|
237
|
+
TEXT
|
238
|
+
end
|
239
|
+
|
240
|
+
# Fetch file content with retry logic for rate limiting
|
190
241
|
def fetch_file_content_with_retry(path, retries = 3)
|
191
242
|
content = @client.contents(@options[:repository], path: path, ref: @options[:branch])
|
192
243
|
Base64.decode64(content.content)
|
@@ -194,20 +245,37 @@ module Gitingest
|
|
194
245
|
raise unless retries.positive?
|
195
246
|
|
196
247
|
sleep_time = 60 / retries
|
197
|
-
|
248
|
+
@logger.warn "Rate limit exceeded, waiting #{sleep_time} seconds..."
|
198
249
|
sleep(sleep_time)
|
199
250
|
fetch_file_content_with_retry(path, retries - 1)
|
200
251
|
end
|
201
252
|
|
253
|
+
# Write buffer contents to file and clear buffer
|
202
254
|
def write_buffer(file, buffer)
|
203
255
|
file.puts(buffer.join)
|
204
256
|
buffer.clear
|
205
257
|
end
|
206
258
|
|
207
|
-
|
208
|
-
def
|
209
|
-
|
210
|
-
|
259
|
+
# Thread-safe mutex for buffer operations
|
260
|
+
def buffer_mutex
|
261
|
+
@buffer_mutex ||= Mutex.new
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
# Helper class for showing progress in CLI
|
266
|
+
class ProgressIndicator
|
267
|
+
def initialize(total, logger)
|
268
|
+
@total = total
|
269
|
+
@logger = logger
|
270
|
+
@last_percent = 0
|
271
|
+
end
|
272
|
+
|
273
|
+
def update(current)
|
274
|
+
percent = (current.to_f / @total * 100).round
|
275
|
+
return unless percent > @last_percent && ((percent % 5).zero? || current == @total)
|
276
|
+
|
277
|
+
@logger.info "Processing: #{percent}% complete (#{current}/#{@total} files)"
|
278
|
+
@last_percent = percent
|
211
279
|
end
|
212
280
|
end
|
213
281
|
end
|
data/lib/gitingest/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gitingest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Davide Santangelo
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: faraday-retry
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: octokit
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|