llm-docs-builder 0.9.2 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Gemfile.lock +1 -1
- data/lib/llm_docs_builder/config.rb +13 -4
- data/lib/llm_docs_builder/generator.rb +79 -1
- data/lib/llm_docs_builder/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 66983a07a7271c966999350d03fbde1b1080ef0ac05a7209452cbb8720074e1b
|
|
4
|
+
data.tar.gz: 5d7cb81a700db6a43c17145af56ffcd2f6cea4a9a5182d4a2b14fd7772c8ee07
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 416b6f94c7e7dbac3bf3e6ae8793adcf9893d685aec29cc83665c2f9a6ab312bd422542e0074bbeec961f23740698aac6e517ebb7c87f3cb0fef3c8c6067c662
|
|
7
|
+
data.tar.gz: 2fba65092d82dbbeea60ce05317a781ed82f20367313d4eab9a69425e6e51f70ec106ca5f56218bbe255d1485752732de2a876359fde7018e706b948d51053fb
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,40 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.9.4 (2025-10-27)
|
|
4
|
+
- [Feature] **Auto-Exclude Hidden Directories** - Hidden directories (starting with `.`) are now automatically excluded by default to prevent noise from `.git`, `.lint`, `.github`, etc.
|
|
5
|
+
- Adds `include_hidden: false` as default behavior
|
|
6
|
+
- Set `include_hidden: true` in config to include hidden directories if needed
|
|
7
|
+
- Uses `Find.prune` for efficient directory tree traversal
|
|
8
|
+
- Prevents scanning of common directories like `.lint`, `.gh`, `.git`, `node_modules` (if hidden)
|
|
9
|
+
- Fixed bug where root directory `.` was being pruned when used as docs_path
|
|
10
|
+
- [Fix] **Excludes Pattern Matching** - Fixed fnmatch pattern handling for better glob pattern support.
|
|
11
|
+
- Fixed `**/.dir/**` patterns now correctly match root-level directories
|
|
12
|
+
- Normalized patterns ending with `/**` to `/**/*` for proper fnmatch behavior
|
|
13
|
+
- Handles `**/` prefix matching for zero-directory cases
|
|
14
|
+
- Fixed relative path calculation to avoid "different prefix" errors
|
|
15
|
+
- [Test] Added unit tests for hidden directory exclusion feature (5 tests)
|
|
16
|
+
- [Test] Added integration tests for hidden directory behavior (3 tests)
|
|
17
|
+
|
|
18
|
+
## 0.9.3 (2025-10-27)
|
|
19
|
+
- [Fix] **Generate Command Excludes Support** - The `generate` command now properly respects the `excludes` configuration option to filter out files from llms.txt generation.
|
|
20
|
+
- Added `should_exclude?` method to Generator class that matches files against glob patterns
|
|
21
|
+
- Supports both simple patterns (e.g., `draft.md`) and glob patterns (e.g., `**/private/**`, `draft-*.md`)
|
|
22
|
+
- Uses `File.fnmatch` with `FNM_PATHNAME` and `FNM_DOTMATCH` flags for proper pattern matching
|
|
23
|
+
- Checks patterns against both absolute and relative paths from docs_path
|
|
24
|
+
- Excludes configuration works consistently with bulk-transform command
|
|
25
|
+
- [Fix] **Token Count from Transformed Content** - Token counts in metadata now accurately reflect the actual content after applying transformations.
|
|
26
|
+
- Token count is now calculated from transformed content when any transformation options are enabled
|
|
27
|
+
- Adds `has_transformations?` helper method to detect if transformations are active
|
|
28
|
+
- Ensures token metadata represents the actual size of processed content, not raw files
|
|
29
|
+
- Falls back to raw content token count when no transformations are enabled
|
|
30
|
+
- [Fix] **Boolean Config Options** - Fixed config merging bug where explicitly setting transformation options to `false` in YAML was being overridden to `true`.
|
|
31
|
+
- Updated `Config#merge_with_options` to properly handle `false` values for boolean options
|
|
32
|
+
- Fixed the `|| true` pattern that was incorrectly treating `false` config values as falsy
|
|
33
|
+
- Now correctly uses `!self['option'].nil?` check before falling back to defaults
|
|
34
|
+
- Applies to all boolean transformation options: `remove_comments`, `normalize_whitespace`, `remove_badges`, `remove_frontmatter`
|
|
35
|
+
- [Test] Added comprehensive unit tests for excludes functionality in Generator
|
|
36
|
+
- [Test] Added integration tests for generate command with excludes and token counting
|
|
37
|
+
|
|
3
38
|
## 0.9.2 (2025-10-17)
|
|
4
39
|
- [Fix] Tackle one more block boundaries tracking edge-case.
|
|
5
40
|
|
data/Gemfile.lock
CHANGED
|
@@ -69,29 +69,38 @@ module LlmDocsBuilder
|
|
|
69
69
|
end,
|
|
70
70
|
remove_comments: if options.key?(:remove_comments)
|
|
71
71
|
options[:remove_comments]
|
|
72
|
+
elsif !self['remove_comments'].nil?
|
|
73
|
+
self['remove_comments']
|
|
72
74
|
else
|
|
73
|
-
|
|
75
|
+
true
|
|
74
76
|
end,
|
|
75
77
|
normalize_whitespace: if options.key?(:normalize_whitespace)
|
|
76
78
|
options[:normalize_whitespace]
|
|
79
|
+
elsif !self['normalize_whitespace'].nil?
|
|
80
|
+
self['normalize_whitespace']
|
|
77
81
|
else
|
|
78
|
-
|
|
82
|
+
true
|
|
79
83
|
end,
|
|
80
84
|
remove_badges: if options.key?(:remove_badges)
|
|
81
85
|
options[:remove_badges]
|
|
86
|
+
elsif !self['remove_badges'].nil?
|
|
87
|
+
self['remove_badges']
|
|
82
88
|
else
|
|
83
|
-
|
|
89
|
+
true
|
|
84
90
|
end,
|
|
85
91
|
remove_frontmatter: if options.key?(:remove_frontmatter)
|
|
86
92
|
options[:remove_frontmatter]
|
|
93
|
+
elsif !self['remove_frontmatter'].nil?
|
|
94
|
+
self['remove_frontmatter']
|
|
87
95
|
else
|
|
88
|
-
|
|
96
|
+
true
|
|
89
97
|
end,
|
|
90
98
|
verbose: options.key?(:verbose) ? options[:verbose] : (self['verbose'] || false),
|
|
91
99
|
# Bulk transformation options
|
|
92
100
|
suffix: options[:suffix] || self['suffix'] || '.llm',
|
|
93
101
|
excludes: options[:excludes] || self['excludes'] || [],
|
|
94
102
|
bulk: options.key?(:bulk) ? options[:bulk] : (self['bulk'] || false),
|
|
103
|
+
include_hidden: options.key?(:include_hidden) ? options[:include_hidden] : (self['include_hidden'] || false),
|
|
95
104
|
# New compression options
|
|
96
105
|
remove_code_examples: if options.key?(:remove_code_examples)
|
|
97
106
|
options[:remove_code_examples]
|
|
@@ -76,9 +76,17 @@ module LlmDocsBuilder
|
|
|
76
76
|
files = []
|
|
77
77
|
|
|
78
78
|
Find.find(docs_path) do |path|
|
|
79
|
+
# Skip hidden directories unless explicitly enabled
|
|
80
|
+
# Don't prune the root docs_path itself (even if it's ".")
|
|
81
|
+
if File.directory?(path) && path != docs_path && File.basename(path).start_with?('.') && !options[:include_hidden]
|
|
82
|
+
Find.prune
|
|
83
|
+
next
|
|
84
|
+
end
|
|
85
|
+
|
|
79
86
|
next unless File.file?(path)
|
|
80
87
|
next unless path.match?(/\.md$/i)
|
|
81
88
|
next if File.basename(path).start_with?('.')
|
|
89
|
+
next if should_exclude?(path)
|
|
82
90
|
|
|
83
91
|
files << analyze_file(path)
|
|
84
92
|
end
|
|
@@ -111,7 +119,12 @@ module LlmDocsBuilder
|
|
|
111
119
|
|
|
112
120
|
# Add optional enhanced metadata
|
|
113
121
|
if options[:include_metadata]
|
|
114
|
-
|
|
122
|
+
# Calculate token count from transformed content if any transformations are enabled
|
|
123
|
+
if options[:include_tokens]
|
|
124
|
+
token_content = has_transformations? ? apply_transformations(content, file_path) : content
|
|
125
|
+
metadata[:tokens] = TokenEstimator.estimate(token_content)
|
|
126
|
+
end
|
|
127
|
+
|
|
115
128
|
metadata[:updated] = File.mtime(file_path).strftime('%Y-%m-%d') if options[:include_timestamps]
|
|
116
129
|
|
|
117
130
|
# Calculate compression ratio if transformation is enabled
|
|
@@ -289,5 +302,70 @@ module LlmDocsBuilder
|
|
|
289
302
|
'priority:low'
|
|
290
303
|
end
|
|
291
304
|
end
|
|
305
|
+
|
|
306
|
+
# Tests if file matches any exclusion pattern from options
|
|
307
|
+
#
|
|
308
|
+
# Uses File.fnmatch with pathname and dotmatch flags.
|
|
309
|
+
# Checks against both absolute path and relative path from docs_path.
|
|
310
|
+
#
|
|
311
|
+
# @param file_path [String] path to check
|
|
312
|
+
# @return [Boolean] true if file should be excluded
|
|
313
|
+
def should_exclude?(file_path)
|
|
314
|
+
excludes = Array(options[:excludes])
|
|
315
|
+
return false if excludes.empty?
|
|
316
|
+
|
|
317
|
+
# Get relative path from docs_path for matching
|
|
318
|
+
relative_path = begin
|
|
319
|
+
if File.directory?(docs_path)
|
|
320
|
+
# Convert both to absolute paths first to avoid "different prefix" error
|
|
321
|
+
abs_file = File.expand_path(file_path)
|
|
322
|
+
abs_docs = File.expand_path(docs_path)
|
|
323
|
+
Pathname.new(abs_file).relative_path_from(Pathname.new(abs_docs)).to_s
|
|
324
|
+
else
|
|
325
|
+
File.basename(file_path)
|
|
326
|
+
end
|
|
327
|
+
rescue ArgumentError
|
|
328
|
+
# If paths can't be made relative (different roots), use basename
|
|
329
|
+
File.basename(file_path)
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
excludes.any? do |pattern|
|
|
333
|
+
# Normalize pattern: ensure /** is followed by something
|
|
334
|
+
# fnmatch requires /** to be followed by at least one component
|
|
335
|
+
normalized_pattern = pattern.end_with?('/**') ? "#{pattern}/*" : pattern
|
|
336
|
+
|
|
337
|
+
# Check both absolute and relative paths
|
|
338
|
+
matches = File.fnmatch(normalized_pattern, file_path, File::FNM_PATHNAME | File::FNM_DOTMATCH) ||
|
|
339
|
+
File.fnmatch(normalized_pattern, relative_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
|
|
340
|
+
|
|
341
|
+
# If pattern starts with **/, also try without it (for root-level matches)
|
|
342
|
+
# Since **/ in fnmatch doesn't match zero directories
|
|
343
|
+
if !matches && normalized_pattern.start_with?('**/')
|
|
344
|
+
pattern_without_prefix = normalized_pattern.sub(%r{^\*\*/}, '')
|
|
345
|
+
matches = File.fnmatch(pattern_without_prefix, file_path, File::FNM_PATHNAME | File::FNM_DOTMATCH) ||
|
|
346
|
+
File.fnmatch(pattern_without_prefix, relative_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
matches
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# Checks if any transformation options are enabled
|
|
354
|
+
#
|
|
355
|
+
# @return [Boolean] true if any transformation option is enabled
|
|
356
|
+
def has_transformations?
|
|
357
|
+
[
|
|
358
|
+
:remove_comments,
|
|
359
|
+
:normalize_whitespace,
|
|
360
|
+
:remove_badges,
|
|
361
|
+
:remove_frontmatter,
|
|
362
|
+
:remove_code_examples,
|
|
363
|
+
:remove_images,
|
|
364
|
+
:simplify_links,
|
|
365
|
+
:remove_blockquotes,
|
|
366
|
+
:remove_stopwords,
|
|
367
|
+
:remove_duplicates
|
|
368
|
+
].any? { |opt| options[opt] }
|
|
369
|
+
end
|
|
292
370
|
end
|
|
293
371
|
end
|