llm-docs-builder 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6ddb04b5a0e30d913d2043a79a3d7e14d35bd0166cf7104a300457887b1019cf
4
- data.tar.gz: 5301d904225d0d139a2c2dd8184695eb66e2d858b8f3b5a86026b16a1ccb8c44
3
+ metadata.gz: 75429a83cfd019e7059f76a5fe7df200bff3bc14066c5f83e0739c85a5a68b63
4
+ data.tar.gz: 850d0568023dd1602cad0b48af5e6437b61f34f760c77b99adb565056313d241
5
5
  SHA512:
6
- metadata.gz: 3daacfdde22c93023677e7e0e7487158b3e1f30a9c4e7ec2bf015bc220ff32296334411c74523aaa411d1ef21b1aa47a2e8cb2c7cada797922d89d5690f7ab0a
7
- data.tar.gz: 50a8f8d29f9e79e6f5ab2774111385f6098378491b2732bb84a9d3eee0b2f5be4b6beae196ef76739a6ac1c81562da9ec6f3e66da562e657828874c55ec06ce1
6
+ metadata.gz: f0384e50c10837ec00e4d195115882d4fdaade5e522bba02929b028ac315d686324baf189bfc16803120b77718b28c10f2026e10f2f2bd3dd1ea7aab09f36549
7
+ data.tar.gz: 184e427780364d704067b1f84962fa7c658782ff2e048c991b34c06ca2643029d107872df1a0beac92cfb07076ae72cf9289527d534cad6274a99fdeaef22ac0
data/CHANGELOG.md CHANGED
@@ -1,8 +1,35 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.9.3 (2025-10-27)
4
+ - [Fix] **Generate Command Excludes Support** - The `generate` command now properly respects the `excludes` configuration option to filter out files from llms.txt generation.
5
+ - Added `should_exclude?` method to Generator class that matches files against glob patterns
6
+ - Supports both simple patterns (e.g., `draft.md`) and glob patterns (e.g., `**/private/**`, `draft-*.md`)
7
+ - Uses `File.fnmatch` with `FNM_PATHNAME` and `FNM_DOTMATCH` flags for proper pattern matching
8
+ - Checks patterns against both absolute and relative paths from docs_path
9
+ - Excludes configuration works consistently with bulk-transform command
10
+ - [Fix] **Token Count from Transformed Content** - Token counts in metadata now accurately reflect the actual content after applying transformations.
11
+ - Token count is now calculated from transformed content when any transformation options are enabled
12
+ - Adds `has_transformations?` helper method to detect if transformations are active
13
+ - Ensures token metadata represents the actual size of processed content, not raw files
14
+ - Falls back to raw content token count when no transformations are enabled
15
+ - [Fix] **Boolean Config Options** - Fixed config merging bug where explicitly setting transformation options to `false` in YAML was being overridden to `true`.
16
+ - Updated `Config#merge_with_options` to properly handle `false` values for boolean options
17
+ - Fixed the `|| true` pattern that was incorrectly treating `false` config values as falsy
18
+ - Now correctly uses `!self['option'].nil?` check before falling back to defaults
19
+ - Applies to all boolean transformation options: `remove_comments`, `normalize_whitespace`, `remove_badges`, `remove_frontmatter`
20
+ - [Test] Added comprehensive unit tests for excludes functionality in Generator
21
+ - [Test] Added integration tests for generate command with excludes and token counting
22
+
23
+ ## 0.9.2 (2025-10-17)
24
+ - [Fix] Tackle one more block boundaries tracking edge-case.
25
+
3
26
  ## 0.9.1 (2025-10-17)
4
27
  - [Fix] Fixed HeadingTransformer incorrectly treating hash symbols in code blocks as headings.
5
28
  - Now properly tracks code block boundaries (fenced with ``` or ~~~)
29
+ - Fixed regex pattern from `/^```|^~~~/` to `/^(```|~~~)/` for correct operator precedence
30
+ - Skips heading processing for lines inside code blocks
31
+ - Prevents Ruby/Python/Shell comments from being interpreted as markdown headings
32
+ - Added 5 comprehensive test cases covering multiple scenarios to prevent regression
6
33
  - Skips heading processing for lines inside code blocks
7
34
  - Prevents Ruby/Python/Shell comments from being interpreted as markdown headings
8
35
  - Added comprehensive test coverage for code block handling
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- llm-docs-builder (0.9.1)
4
+ llm-docs-builder (0.9.3)
5
5
  zeitwerk (~> 2.6)
6
6
 
7
7
  GEM
@@ -69,23 +69,31 @@ module LlmDocsBuilder
69
69
  end,
70
70
  remove_comments: if options.key?(:remove_comments)
71
71
  options[:remove_comments]
72
+ elsif !self['remove_comments'].nil?
73
+ self['remove_comments']
72
74
  else
73
- self['remove_comments'] || true
75
+ true
74
76
  end,
75
77
  normalize_whitespace: if options.key?(:normalize_whitespace)
76
78
  options[:normalize_whitespace]
79
+ elsif !self['normalize_whitespace'].nil?
80
+ self['normalize_whitespace']
77
81
  else
78
- self['normalize_whitespace'] || true
82
+ true
79
83
  end,
80
84
  remove_badges: if options.key?(:remove_badges)
81
85
  options[:remove_badges]
86
+ elsif !self['remove_badges'].nil?
87
+ self['remove_badges']
82
88
  else
83
- self['remove_badges'] || true
89
+ true
84
90
  end,
85
91
  remove_frontmatter: if options.key?(:remove_frontmatter)
86
92
  options[:remove_frontmatter]
93
+ elsif !self['remove_frontmatter'].nil?
94
+ self['remove_frontmatter']
87
95
  else
88
- self['remove_frontmatter'] || true
96
+ true
89
97
  end,
90
98
  verbose: options.key?(:verbose) ? options[:verbose] : (self['verbose'] || false),
91
99
  # Bulk transformation options
@@ -79,6 +79,7 @@ module LlmDocsBuilder
79
79
  next unless File.file?(path)
80
80
  next unless path.match?(/\.md$/i)
81
81
  next if File.basename(path).start_with?('.')
82
+ next if should_exclude?(path)
82
83
 
83
84
  files << analyze_file(path)
84
85
  end
@@ -111,7 +112,12 @@ module LlmDocsBuilder
111
112
 
112
113
  # Add optional enhanced metadata
113
114
  if options[:include_metadata]
114
- metadata[:tokens] = TokenEstimator.estimate(content) if options[:include_tokens]
115
+ # Calculate token count from transformed content if any transformations are enabled
116
+ if options[:include_tokens]
117
+ token_content = has_transformations? ? apply_transformations(content, file_path) : content
118
+ metadata[:tokens] = TokenEstimator.estimate(token_content)
119
+ end
120
+
115
121
  metadata[:updated] = File.mtime(file_path).strftime('%Y-%m-%d') if options[:include_timestamps]
116
122
 
117
123
  # Calculate compression ratio if transformation is enabled
@@ -289,5 +295,48 @@ module LlmDocsBuilder
289
295
  'priority:low'
290
296
  end
291
297
  end
298
+
299
+ # Tests if file matches any exclusion pattern from options
300
+ #
301
+ # Uses File.fnmatch with pathname and dotmatch flags.
302
+ # Checks against both absolute path and relative path from docs_path.
303
+ #
304
+ # @param file_path [String] path to check
305
+ # @return [Boolean] true if file should be excluded
306
+ def should_exclude?(file_path)
307
+ excludes = Array(options[:excludes])
308
+ return false if excludes.empty?
309
+
310
+ # Get relative path from docs_path for matching
311
+ relative_path = if File.directory?(docs_path)
312
+ Pathname.new(file_path).relative_path_from(Pathname.new(docs_path)).to_s
313
+ else
314
+ File.basename(file_path)
315
+ end
316
+
317
+ excludes.any? do |pattern|
318
+ # Check both absolute and relative paths
319
+ File.fnmatch(pattern, file_path, File::FNM_PATHNAME | File::FNM_DOTMATCH) ||
320
+ File.fnmatch(pattern, relative_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
321
+ end
322
+ end
323
+
324
+ # Checks if any transformation options are enabled
325
+ #
326
+ # @return [Boolean] true if any transformation option is enabled
327
+ def has_transformations?
328
+ [
329
+ :remove_comments,
330
+ :normalize_whitespace,
331
+ :remove_badges,
332
+ :remove_frontmatter,
333
+ :remove_code_examples,
334
+ :remove_images,
335
+ :simplify_links,
336
+ :remove_blockquotes,
337
+ :remove_stopwords,
338
+ :remove_duplicates
339
+ ].any? { |opt| options[opt] }
340
+ end
292
341
  end
293
342
  end
@@ -42,7 +42,7 @@ module LlmDocsBuilder
42
42
 
43
43
  transformed_lines = lines.map do |line|
44
44
  # Track code block boundaries (fenced code blocks with ``` or ~~~)
45
- if line.match?(/^```|^~~~/)
45
+ if line.match?(/^(```|~~~)/)
46
46
  in_code_block = !in_code_block
47
47
  next line
48
48
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  module LlmDocsBuilder
4
4
  # Current version of the LlmDocsBuilder gem
5
- VERSION = '0.9.1'
5
+ VERSION = '0.9.3'
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llm-docs-builder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.1
4
+ version: 0.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld