llm-docs-builder 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/Gemfile.lock +1 -1
- data/lib/llm_docs_builder/config.rb +12 -4
- data/lib/llm_docs_builder/generator.rb +50 -1
- data/lib/llm_docs_builder/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 75429a83cfd019e7059f76a5fe7df200bff3bc14066c5f83e0739c85a5a68b63
|
|
4
|
+
data.tar.gz: 850d0568023dd1602cad0b48af5e6437b61f34f760c77b99adb565056313d241
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f0384e50c10837ec00e4d195115882d4fdaade5e522bba02929b028ac315d686324baf189bfc16803120b77718b28c10f2026e10f2f2bd3dd1ea7aab09f36549
|
|
7
|
+
data.tar.gz: 184e427780364d704067b1f84962fa7c658782ff2e048c991b34c06ca2643029d107872df1a0beac92cfb07076ae72cf9289527d534cad6274a99fdeaef22ac0
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.9.3 (2025-10-27)
|
|
4
|
+
- [Fix] **Generate Command Excludes Support** - The `generate` command now properly respects the `excludes` configuration option to filter out files from llms.txt generation.
|
|
5
|
+
- Added `should_exclude?` method to Generator class that matches files against glob patterns
|
|
6
|
+
- Supports both simple patterns (e.g., `draft.md`) and glob patterns (e.g., `**/private/**`, `draft-*.md`)
|
|
7
|
+
- Uses `File.fnmatch` with `FNM_PATHNAME` and `FNM_DOTMATCH` flags for proper pattern matching
|
|
8
|
+
- Checks patterns against both absolute and relative paths from docs_path
|
|
9
|
+
- Excludes configuration works consistently with bulk-transform command
|
|
10
|
+
- [Fix] **Token Count from Transformed Content** - Token counts in metadata now accurately reflect the actual content after applying transformations.
|
|
11
|
+
- Token count is now calculated from transformed content when any transformation options are enabled
|
|
12
|
+
- Adds `has_transformations?` helper method to detect if transformations are active
|
|
13
|
+
- Ensures token metadata represents the actual size of processed content, not raw files
|
|
14
|
+
- Falls back to raw content token count when no transformations are enabled
|
|
15
|
+
- [Fix] **Boolean Config Options** - Fixed config merging bug where explicitly setting transformation options to `false` in YAML was being overridden to `true`.
|
|
16
|
+
- Updated `Config#merge_with_options` to properly handle `false` values for boolean options
|
|
17
|
+
- Fixed the `|| true` pattern that was incorrectly treating `false` config values as falsy
|
|
18
|
+
- Now correctly uses `!self['option'].nil?` check before falling back to defaults
|
|
19
|
+
- Applies to all boolean transformation options: `remove_comments`, `normalize_whitespace`, `remove_badges`, `remove_frontmatter`
|
|
20
|
+
- [Test] Added comprehensive unit tests for excludes functionality in Generator
|
|
21
|
+
- [Test] Added integration tests for generate command with excludes and token counting
|
|
22
|
+
|
|
3
23
|
## 0.9.2 (2025-10-17)
|
|
4
24
|
- [Fix] Tackle one more block boundaries tracking edge-case.
|
|
5
25
|
|
data/Gemfile.lock
CHANGED
|
@@ -69,23 +69,31 @@ module LlmDocsBuilder
|
|
|
69
69
|
end,
|
|
70
70
|
remove_comments: if options.key?(:remove_comments)
|
|
71
71
|
options[:remove_comments]
|
|
72
|
+
elsif !self['remove_comments'].nil?
|
|
73
|
+
self['remove_comments']
|
|
72
74
|
else
|
|
73
|
-
|
|
75
|
+
true
|
|
74
76
|
end,
|
|
75
77
|
normalize_whitespace: if options.key?(:normalize_whitespace)
|
|
76
78
|
options[:normalize_whitespace]
|
|
79
|
+
elsif !self['normalize_whitespace'].nil?
|
|
80
|
+
self['normalize_whitespace']
|
|
77
81
|
else
|
|
78
|
-
|
|
82
|
+
true
|
|
79
83
|
end,
|
|
80
84
|
remove_badges: if options.key?(:remove_badges)
|
|
81
85
|
options[:remove_badges]
|
|
86
|
+
elsif !self['remove_badges'].nil?
|
|
87
|
+
self['remove_badges']
|
|
82
88
|
else
|
|
83
|
-
|
|
89
|
+
true
|
|
84
90
|
end,
|
|
85
91
|
remove_frontmatter: if options.key?(:remove_frontmatter)
|
|
86
92
|
options[:remove_frontmatter]
|
|
93
|
+
elsif !self['remove_frontmatter'].nil?
|
|
94
|
+
self['remove_frontmatter']
|
|
87
95
|
else
|
|
88
|
-
|
|
96
|
+
true
|
|
89
97
|
end,
|
|
90
98
|
verbose: options.key?(:verbose) ? options[:verbose] : (self['verbose'] || false),
|
|
91
99
|
# Bulk transformation options
|
|
@@ -79,6 +79,7 @@ module LlmDocsBuilder
|
|
|
79
79
|
next unless File.file?(path)
|
|
80
80
|
next unless path.match?(/\.md$/i)
|
|
81
81
|
next if File.basename(path).start_with?('.')
|
|
82
|
+
next if should_exclude?(path)
|
|
82
83
|
|
|
83
84
|
files << analyze_file(path)
|
|
84
85
|
end
|
|
@@ -111,7 +112,12 @@ module LlmDocsBuilder
|
|
|
111
112
|
|
|
112
113
|
# Add optional enhanced metadata
|
|
113
114
|
if options[:include_metadata]
|
|
114
|
-
|
|
115
|
+
# Calculate token count from transformed content if any transformations are enabled
|
|
116
|
+
if options[:include_tokens]
|
|
117
|
+
token_content = has_transformations? ? apply_transformations(content, file_path) : content
|
|
118
|
+
metadata[:tokens] = TokenEstimator.estimate(token_content)
|
|
119
|
+
end
|
|
120
|
+
|
|
115
121
|
metadata[:updated] = File.mtime(file_path).strftime('%Y-%m-%d') if options[:include_timestamps]
|
|
116
122
|
|
|
117
123
|
# Calculate compression ratio if transformation is enabled
|
|
@@ -289,5 +295,48 @@ module LlmDocsBuilder
|
|
|
289
295
|
'priority:low'
|
|
290
296
|
end
|
|
291
297
|
end
|
|
298
|
+
|
|
299
|
+
# Tests if file matches any exclusion pattern from options
|
|
300
|
+
#
|
|
301
|
+
# Uses File.fnmatch with pathname and dotmatch flags.
|
|
302
|
+
# Checks against both absolute path and relative path from docs_path.
|
|
303
|
+
#
|
|
304
|
+
# @param file_path [String] path to check
|
|
305
|
+
# @return [Boolean] true if file should be excluded
|
|
306
|
+
def should_exclude?(file_path)
|
|
307
|
+
excludes = Array(options[:excludes])
|
|
308
|
+
return false if excludes.empty?
|
|
309
|
+
|
|
310
|
+
# Get relative path from docs_path for matching
|
|
311
|
+
relative_path = if File.directory?(docs_path)
|
|
312
|
+
Pathname.new(file_path).relative_path_from(Pathname.new(docs_path)).to_s
|
|
313
|
+
else
|
|
314
|
+
File.basename(file_path)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
excludes.any? do |pattern|
|
|
318
|
+
# Check both absolute and relative paths
|
|
319
|
+
File.fnmatch(pattern, file_path, File::FNM_PATHNAME | File::FNM_DOTMATCH) ||
|
|
320
|
+
File.fnmatch(pattern, relative_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Checks if any transformation options are enabled
|
|
325
|
+
#
|
|
326
|
+
# @return [Boolean] true if any transformation option is enabled
|
|
327
|
+
def has_transformations?
|
|
328
|
+
[
|
|
329
|
+
:remove_comments,
|
|
330
|
+
:normalize_whitespace,
|
|
331
|
+
:remove_badges,
|
|
332
|
+
:remove_frontmatter,
|
|
333
|
+
:remove_code_examples,
|
|
334
|
+
:remove_images,
|
|
335
|
+
:simplify_links,
|
|
336
|
+
:remove_blockquotes,
|
|
337
|
+
:remove_stopwords,
|
|
338
|
+
:remove_duplicates
|
|
339
|
+
].any? { |opt| options[opt] }
|
|
340
|
+
end
|
|
292
341
|
end
|
|
293
342
|
end
|