llm-docs-builder 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,9 +63,7 @@ module LlmDocsBuilder
63
63
  content = heading_transformer.transform(content, options)
64
64
  content = compress_content(content) if should_compress?
65
65
  content = enhancement_transformer.transform(content, options)
66
- content = whitespace_transformer.transform(content, options)
67
-
68
- content
66
+ whitespace_transformer.transform(content, options)
69
67
  end
70
68
 
71
69
  private
@@ -114,7 +112,7 @@ module LlmDocsBuilder
114
112
 
115
113
  # Compress content using TextCompressor
116
114
  #
117
- # @param content [String] content to compress
115
+ # @param content [String] markdown content
118
116
  # @return [String] compressed content
119
117
  def compress_content(content)
120
118
  compressor = TextCompressor.new
@@ -129,11 +127,27 @@ module LlmDocsBuilder
129
127
  #
130
128
  # @return [String] markdown content to transform
131
129
  def load_content
132
- if options[:content]
133
- options[:content].dup
134
- else
135
- File.read(file_path)
136
- end
130
+ content = options[:content] ? options[:content].dup : File.read(file_path)
131
+ snippet = html_detector.detection_snippet(content)
132
+
133
+ return content if html_detector.table_fragment?(snippet)
134
+ return html_to_markdown_converter.convert(content) if html_detector.html_content?(content, snippet)
135
+
136
+ content
137
+ end
138
+
139
+ # Memoized HTML to markdown converter
140
+ #
141
+ # @return [HtmlToMarkdownConverter]
142
+ def html_to_markdown_converter
143
+ @html_to_markdown_converter ||= HtmlToMarkdownConverter.new
144
+ end
145
+
146
+ # Memoized HTML detector
147
+ #
148
+ # @return [HtmlDetector]
149
+ def html_detector
150
+ @html_detector ||= HtmlDetector.new
137
151
  end
138
152
  end
139
153
  end
@@ -30,7 +30,7 @@ module LlmDocsBuilder
30
30
 
31
31
  # Format number with comma separators for readability
32
32
  #
33
- # @param number [Integer] number to format
33
+ # @param number [Integer] integer value
34
34
  # @return [String] formatted number with commas
35
35
  #
36
36
  # @example
@@ -1,6 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LlmDocsBuilder
4
+ # Provides content transformation functionality
5
+ #
6
+ # This module contains specialized transformers for modifying markdown content,
7
+ # including cleanup operations, link processing, heading normalization, and
8
+ # content enhancement for AI consumption.
9
+ #
10
+ # @api private
4
11
  module Transformers
5
12
  # Base module for all transformers
6
13
  #
@@ -11,9 +18,12 @@ module LlmDocsBuilder
11
18
  module BaseTransformer
12
19
  # Transform content
13
20
  #
14
- # @param content [String] content to transform
21
+ # @abstract Subclasses must implement this method and document specific options
22
+ # @param content [String] markdown content
15
23
  # @param options [Hash] transformation options
24
+ # @option options [Object] :* options vary by implementation - see specific transformer classes
16
25
  # @return [String] transformed content
26
+ # @note Options vary by implementation - see specific transformer classes for supported keys
17
27
  def transform(content, options = {})
18
28
  raise NotImplementedError, "#{self.class} must implement #transform"
19
29
  end
@@ -21,7 +31,9 @@ module LlmDocsBuilder
21
31
  # Check if transformation should be applied
22
32
  #
23
33
  # @param options [Hash] transformation options
34
+ # @option options [Object] :* options vary by implementation - see specific transformer classes
24
35
  # @return [Boolean] true if transformation should be applied
36
+ # @note Options vary by implementation - see specific transformer classes for supported keys
25
37
  def should_transform?(options)
26
38
  true
27
39
  end
@@ -9,7 +9,10 @@ module LlmDocsBuilder
9
9
  # Provides common functionality needed by multiple commands (transform, compare)
10
10
  # including strict scheme validation, redirect handling and sensible timeouts.
11
11
  class UrlFetcher
12
+ # Default user agent string for HTTP requests
12
13
  DEFAULT_USER_AGENT = 'llm-docs-builder/1.0 (+https://github.com/mensfeld/llm-docs-builder)'
14
+
15
+ # Maximum number of redirects to follow
13
16
  MAX_REDIRECTS = 10
14
17
 
15
18
  # @param user_agent [String] HTTP user agent header value
@@ -71,6 +74,11 @@ module LlmDocsBuilder
71
74
 
72
75
  private
73
76
 
77
+ # Validate and parse URL string
78
+ #
79
+ # @param url_string [String] URL to validate
80
+ # @return [URI::HTTP, URI::HTTPS] parsed URI
81
+ # @raise [Errors::GenerationError] if URL is invalid or unsupported
74
82
  def validate_and_parse_url(url_string)
75
83
  uri = URI.parse(url_string)
76
84
 
@@ -96,6 +104,12 @@ module LlmDocsBuilder
96
104
  )
97
105
  end
98
106
 
107
+ # Convert redirect location to absolute URL
108
+ #
109
+ # @param base_uri [URI] base URI
110
+ # @param location [String] redirect location
111
+ # @return [String] absolute redirect URL
112
+ # @raise [Errors::GenerationError] if location is invalid
99
113
  def absolute_redirect_url(base_uri, location)
100
114
  raise(
101
115
  Errors::GenerationError,
@@ -110,6 +124,10 @@ module LlmDocsBuilder
110
124
  )
111
125
  end
112
126
 
127
+ # Log redirect if verbose mode enabled
128
+ #
129
+ # @param url [String] redirect URL
130
+ # @return [void]
113
131
  def log_redirect(url)
114
132
  return unless @verbose
115
133
 
@@ -2,5 +2,5 @@
2
2
 
3
3
  module LlmDocsBuilder
4
4
  # Current version of the LlmDocsBuilder gem
5
- VERSION = '0.11.0'
5
+ VERSION = '0.12.0'
6
6
  end
@@ -4,10 +4,20 @@ require 'zeitwerk'
4
4
  require 'pathname'
5
5
  require 'find'
6
6
 
7
+ autoload(:Nokogiri, 'nokogiri')
8
+ autoload(:CGI, 'cgi')
9
+
7
10
  loader = Zeitwerk::Loader.for_gem
8
11
  loader.inflector.inflect('cli' => 'CLI')
9
12
  loader.setup
10
13
 
14
+ # Build and optimize documentation for LLMs
15
+ #
16
+ # This gem provides tools for generating llms.txt files and transforming markdown
17
+ # documentation to be AI-friendly. It can reduce token consumption by 67-95% while
18
+ # preserving essential documentation content.
19
+ #
20
+ # @api public
11
21
  module LlmDocsBuilder
12
22
  class << self
13
23
  # Generates llms.txt from existing markdown documentation
@@ -35,6 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.executables = spec.files.grep(%r{\Abin/}) { |f| File.basename(f) }
36
36
  spec.require_paths = ['lib']
37
37
 
38
+ spec.add_dependency 'nokogiri', '~> 1.17'
38
39
  spec.add_dependency 'zeitwerk', '~> 2.6'
39
40
 
40
41
  spec.add_development_dependency 'bundler', '~> 2.0'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llm-docs-builder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -9,6 +9,20 @@ bindir: bin
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: nokogiri
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '1.17'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '1.17'
12
26
  - !ruby/object:Gem::Dependency
13
27
  name: zeitwerk
14
28
  requirement: !ruby/object:Gem::Requirement
@@ -116,7 +130,6 @@ files:
116
130
  - ".rspec"
117
131
  - ".rubocop.yml"
118
132
  - ".ruby-version"
119
- - AGENTS.md
120
133
  - CHANGELOG.md
121
134
  - Dockerfile
122
135
  - Gemfile
@@ -133,6 +146,13 @@ files:
133
146
  - lib/llm_docs_builder/config.rb
134
147
  - lib/llm_docs_builder/errors.rb
135
148
  - lib/llm_docs_builder/generator.rb
149
+ - lib/llm_docs_builder/helpers.rb
150
+ - lib/llm_docs_builder/helpers/prune_trailing_unsafe_link_separator.rb
151
+ - lib/llm_docs_builder/helpers/squeeze_blank_lines_outside_fences.rb
152
+ - lib/llm_docs_builder/html_detector.rb
153
+ - lib/llm_docs_builder/html_to_markdown/figure_code_block_renderer.rb
154
+ - lib/llm_docs_builder/html_to_markdown/table_markup_renderer.rb
155
+ - lib/llm_docs_builder/html_to_markdown_converter.rb
136
156
  - lib/llm_docs_builder/markdown_transformer.rb
137
157
  - lib/llm_docs_builder/output_formatter.rb
138
158
  - lib/llm_docs_builder/parser.rb
data/AGENTS.md DELETED
@@ -1,20 +0,0 @@
1
- # Repository Guidelines
2
-
3
- ## Project Structure & Module Organization
4
- Core gem code lives in `lib/llm_docs_builder`, with single-responsibility modules such as `generator.rb`, `validator.rb`, and the CLI glue in `cli.rb`. Shared entrypoint `lib/llm_docs_builder.rb` wires dependencies. Executables reside in `bin/`: `llm-docs-builder` boots the CLI, while `rspecs` runs the full test matrix. Specs mirror library files under `spec/` with command-level coverage in `spec/integrations`. Static assets (logos, diff screenshots) are in `misc/`. Example configuration templates live at `llm-docs-builder.yml.example`.
5
-
6
- ## Build, Test, and Development Commands
7
- - `bundle install` — sync gem dependencies defined in `Gemfile`.
8
- - `bundle exec rake` — default task; runs RSpec and RuboCop together.
9
- - `bundle exec rspec` or `bin/rspecs` — execute unit and integration specs with doc formatter.
10
- - `bundle exec rubocop` — enforce the Ruby style guide; mirrors CI.
11
- - `bin/llm-docs-builder transform --docs README.md` — smoke-test the CLI against a local file.
12
-
13
- ## Coding Style & Naming Conventions
14
- Target Ruby 3.2 with two-space indentation and trailing newline. Prefer single-quoted strings; enable `# frozen_string_literal: true` headers on Ruby files. Keep lines ≤120 characters except where the RuboCop config allows. Use descriptive module/class names (e.g., `LlmDocsBuilder::Generator`) and predicate methods ending with `?` when returning booleans. Place supporting fixtures in `spec/support` if added, and name files after the class they extend.
15
-
16
- ## Testing Guidelines
17
- RSpec is the sole testing framework. Name files `*_spec.rb` and align describe blocks with constant paths. Integration scenarios belong in `spec/integrations` to capture CLI behaviors. SimpleCov is enabled by default for line and branch coverage; export `SIMPLECOV=false` for quick local runs. Persist example statuses with the automatically managed `spec/examples.txt`.
18
-
19
- ## Commit & Pull Request Guidelines
20
- Keep commit subjects short, present-tense, and focused (e.g., `Align CLI config (#27)`). Group related changes together so `git log` remains readable. Pull requests should describe motivation, summarize behavioral impact, link related issues or discussions, and include CLI output or screenshots when touching generated docs. Ensure CI passes (`bundle exec rake`) before requesting review, and note any follow-up work in the PR description.