openclacky 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,42 @@
3
3
  module Clacky
4
4
  module Tools
5
5
  class Grep < Base
6
+ # Default patterns to ignore when .gitignore is not available
7
+ DEFAULT_IGNORED_PATTERNS = [
8
+ 'node_modules',
9
+ 'vendor/bundle',
10
+ '.git',
11
+ '.svn',
12
+ 'tmp',
13
+ 'log',
14
+ 'coverage',
15
+ 'dist',
16
+ 'build',
17
+ '.bundle',
18
+ '.sass-cache',
19
+ '.DS_Store',
20
+ '*.log'
21
+ ].freeze
22
+
23
+ # Config file patterns that should always be searchable
24
+ CONFIG_FILE_PATTERNS = [
25
+ /\.env/,
26
+ /\.ya?ml$/,
27
+ /\.json$/,
28
+ /\.toml$/,
29
+ /\.ini$/,
30
+ /\.conf$/,
31
+ /\.config$/,
32
+ /config\//,
33
+ /\.config\//
34
+ ].freeze
35
+
36
+ # Maximum file size to search (1MB)
37
+ MAX_FILE_SIZE = 1_048_576
38
+
39
+ # Maximum line length to display (to avoid huge outputs)
40
+ MAX_LINE_LENGTH = 500
41
+
6
42
  self.tool_name = "grep"
7
43
  self.tool_description = "Search file contents using regular expressions. Returns matching lines with context."
8
44
  self.tool_category = "file_system"
@@ -30,53 +66,144 @@ module Clacky
30
66
  },
31
67
  context_lines: {
32
68
  type: "integer",
33
- description: "Number of context lines to show before and after each match",
69
+ description: "Number of context lines to show before and after each match (max: 10)",
34
70
  default: 0
35
71
  },
36
- max_matches: {
72
+ max_files: {
37
73
  type: "integer",
38
74
  description: "Maximum number of matching files to return",
39
75
  default: 50
76
+ },
77
+ max_matches_per_file: {
78
+ type: "integer",
79
+ description: "Maximum number of matches to return per file",
80
+ default: 50
81
+ },
82
+ max_total_matches: {
83
+ type: "integer",
84
+ description: "Maximum total number of matches to return across all files",
85
+ default: 200
86
+ },
87
+ max_file_size: {
88
+ type: "integer",
89
+ description: "Maximum file size in bytes to search (default: 1MB)",
90
+ default: MAX_FILE_SIZE
91
+ },
92
+ max_files_to_search: {
93
+ type: "integer",
94
+ description: "Maximum number of files to search",
95
+ default: 500
40
96
  }
41
97
  },
42
98
  required: %w[pattern]
43
99
  }
44
100
 
45
- def execute(pattern:, path: ".", file_pattern: "**/*", case_insensitive: false, context_lines: 0, max_matches: 50)
101
+ def execute(
102
+ pattern:,
103
+ path: ".",
104
+ file_pattern: "**/*",
105
+ case_insensitive: false,
106
+ context_lines: 0,
107
+ max_files: 50,
108
+ max_matches_per_file: 50,
109
+ max_total_matches: 200,
110
+ max_file_size: MAX_FILE_SIZE,
111
+ max_files_to_search: 500
112
+ )
46
113
  # Validate pattern
47
114
  if pattern.nil? || pattern.strip.empty?
48
115
  return { error: "Pattern cannot be empty" }
49
116
  end
50
117
 
51
- # Validate path
52
- unless File.exist?(path)
118
+ # Validate and expand path
119
+ begin
120
+ expanded_path = File.expand_path(path)
121
+ rescue StandardError => e
122
+ return { error: "Invalid path: #{e.message}" }
123
+ end
124
+
125
+ unless File.exist?(expanded_path)
53
126
  return { error: "Path does not exist: #{path}" }
54
127
  end
55
128
 
129
+ # Limit context_lines
130
+ context_lines = [[context_lines, 0].max, 10].min
131
+
56
132
  begin
57
133
  # Compile regex
58
134
  regex_options = case_insensitive ? Regexp::IGNORECASE : 0
59
135
  regex = Regexp.new(pattern, regex_options)
60
136
 
137
+ # Initialize gitignore parser
138
+ gitignore_path = find_gitignore(expanded_path)
139
+ gitignore = gitignore_path ? GitignoreParser.new(gitignore_path) : nil
140
+
61
141
  results = []
62
142
  total_matches = 0
143
+ files_searched = 0
144
+ skipped = {
145
+ binary: 0,
146
+ too_large: 0,
147
+ ignored: 0
148
+ }
149
+ truncation_reason = nil
63
150
 
64
151
  # Get files to search
65
- files = if File.file?(path)
66
- [path]
152
+ files = if File.file?(expanded_path)
153
+ [expanded_path]
67
154
  else
68
- Dir.glob(File.join(path, file_pattern))
155
+ Dir.glob(File.join(expanded_path, file_pattern))
69
156
  .select { |f| File.file?(f) }
70
- .reject { |f| binary_file?(f) }
71
157
  end
72
158
 
73
159
  # Search each file
74
160
  files.each do |file|
75
- break if results.length >= max_matches
161
+ # Check if we've searched enough files
162
+ if files_searched >= max_files_to_search
163
+ truncation_reason ||= "max_files_to_search limit reached"
164
+ break
165
+ end
166
+
167
+ # Skip if file should be ignored (unless it's a config file)
168
+ if should_ignore_file?(file, expanded_path, gitignore) && !is_config_file?(file)
169
+ skipped[:ignored] += 1
170
+ next
171
+ end
172
+
173
+ # Skip binary files
174
+ if binary_file?(file)
175
+ skipped[:binary] += 1
176
+ next
177
+ end
178
+
179
+ # Skip files that are too large
180
+ if File.size(file) > max_file_size
181
+ skipped[:too_large] += 1
182
+ next
183
+ end
184
+
185
+ files_searched += 1
186
+
187
+ # Check if we've found enough matching files
188
+ if results.length >= max_files
189
+ truncation_reason ||= "max_files limit reached"
190
+ break
191
+ end
192
+
193
+ # Check if we've found enough total matches
194
+ if total_matches >= max_total_matches
195
+ truncation_reason ||= "max_total_matches limit reached"
196
+ break
197
+ end
76
198
 
77
- matches = search_file(file, regex, context_lines)
199
+ # Search the file
200
+ matches = search_file(file, regex, context_lines, max_matches_per_file)
78
201
  next if matches.empty?
79
202
 
203
+ # Add remaining matches respecting max_total_matches
204
+ remaining_matches = max_total_matches - total_matches
205
+ matches = matches.take(remaining_matches) if remaining_matches < matches.length
206
+
80
207
  results << {
81
208
  file: File.expand_path(file),
82
209
  matches: matches
@@ -87,9 +214,11 @@ module Clacky
87
214
  {
88
215
  results: results,
89
216
  total_matches: total_matches,
90
- files_searched: files.length,
217
+ files_searched: files_searched,
91
218
  files_with_matches: results.length,
92
- truncated: results.length >= max_matches,
219
+ skipped_files: skipped,
220
+ truncated: !truncation_reason.nil?,
221
+ truncation_reason: truncation_reason,
93
222
  error: nil
94
223
  }
95
224
  rescue RegexpError => e
@@ -116,36 +245,96 @@ module Clacky
116
245
  else
117
246
  matches = result[:total_matches] || 0
118
247
  files = result[:files_with_matches] || 0
119
- "✓ Found #{matches} matches in #{files} files"
248
+ msg = "✓ Found #{matches} matches in #{files} files"
249
+
250
+ # Add truncation info if present
251
+ if result[:truncated] && result[:truncation_reason]
252
+ msg += " (truncated: #{result[:truncation_reason]})"
253
+ end
254
+
255
+ msg
120
256
  end
121
257
  end
122
258
 
123
259
  private
124
260
 
125
- def search_file(file, regex, context_lines)
126
- matches = []
127
- lines = File.readlines(file, chomp: true)
261
+ # Find .gitignore file in the search path or parent directories
262
+ def find_gitignore(path)
263
+ search_path = File.directory?(path) ? path : File.dirname(path)
264
+
265
+ # Look for .gitignore in current and parent directories
266
+ current = File.expand_path(search_path)
267
+ root = File.expand_path('/')
268
+
269
+ loop do
270
+ gitignore = File.join(current, '.gitignore')
271
+ return gitignore if File.exist?(gitignore)
272
+
273
+ break if current == root
274
+ current = File.dirname(current)
275
+ end
276
+
277
+ nil
278
+ end
279
+
280
+ # Check if file should be ignored based on .gitignore or default patterns
281
+ def should_ignore_file?(file, base_path, gitignore)
282
+ # Calculate relative path
283
+ if file.start_with?(base_path)
284
+ relative_path = file[base_path.length + 1..] || file
285
+ else
286
+ relative_path = file
287
+ end
288
+ relative_path = relative_path.sub(/^\.\//, '') if relative_path
289
+ relative_path ||= file
290
+
291
+ if gitignore
292
+ # Use .gitignore rules
293
+ gitignore.ignored?(relative_path)
294
+ else
295
+ # Use default ignore patterns
296
+ DEFAULT_IGNORED_PATTERNS.any? do |pattern|
297
+ if pattern.include?('*')
298
+ File.fnmatch(pattern, relative_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
299
+ else
300
+ relative_path.start_with?("#{pattern}/") ||
301
+ relative_path.include?("/#{pattern}/") ||
302
+ relative_path == pattern ||
303
+ File.basename(relative_path) == pattern
304
+ end
305
+ end
306
+ end
307
+ end
128
308
 
129
- lines.each_with_index do |line, index|
309
+ # Check if file is a config file (should not be ignored even if in .gitignore)
310
+ def is_config_file?(file)
311
+ CONFIG_FILE_PATTERNS.any? { |pattern| file.match?(pattern) }
312
+ end
313
+
314
+ def search_file(file, regex, context_lines, max_matches)
315
+ matches = []
316
+
317
+ # Use File.foreach for memory-efficient line-by-line reading
318
+ File.foreach(file, chomp: true).with_index do |line, index|
319
+ # Stop if we have enough matches for this file
320
+ break if matches.length >= max_matches
321
+
130
322
  next unless line.match?(regex)
131
323
 
132
- # Get context
133
- start_line = [0, index - context_lines].max
134
- end_line = [lines.length - 1, index + context_lines].min
324
+ # Truncate long lines
325
+ display_line = line.length > MAX_LINE_LENGTH ? "#{line[0...MAX_LINE_LENGTH]}..." : line
135
326
 
136
- context = []
137
- (start_line..end_line).each do |i|
138
- context << {
139
- line_number: i + 1,
140
- content: lines[i],
141
- is_match: i == index
142
- }
327
+ # Get context if requested
328
+ if context_lines > 0
329
+ context = get_line_context(file, index, context_lines)
330
+ else
331
+ context = nil
143
332
  end
144
333
 
145
334
  matches << {
146
335
  line_number: index + 1,
147
- line: line,
148
- context: context_lines > 0 ? context : nil
336
+ line: display_line,
337
+ context: context
149
338
  }
150
339
  end
151
340
 
@@ -154,6 +343,32 @@ module Clacky
154
343
  []
155
344
  end
156
345
 
346
+ # Get context lines around a match
347
+ def get_line_context(file, match_index, context_lines)
348
+ lines = File.readlines(file, chomp: true)
349
+ start_line = [0, match_index - context_lines].max
350
+ end_line = [lines.length - 1, match_index + context_lines].min
351
+
352
+ context = []
353
+ (start_line..end_line).each do |i|
354
+ line_content = lines[i]
355
+ # Truncate long lines in context too
356
+ display_content = line_content.length > MAX_LINE_LENGTH ?
357
+ "#{line_content[0...MAX_LINE_LENGTH]}..." :
358
+ line_content
359
+
360
+ context << {
361
+ line_number: i + 1,
362
+ content: display_content,
363
+ is_match: i == match_index
364
+ }
365
+ end
366
+
367
+ context
368
+ rescue StandardError
369
+ nil
370
+ end
371
+
157
372
  def binary_file?(file)
158
373
  # Simple heuristic: check if file contains null bytes in first 8KB
159
374
  return false unless File.exist?(file)