trifle-docs 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -6
- data/README.md +78 -31
- data/lib/trifle/docs/app.rb +24 -0
- data/lib/trifle/docs/engine.rb +27 -1
- data/lib/trifle/docs/harvester/markdown.rb +20 -1
- data/lib/trifle/docs/harvester.rb +284 -2
- data/lib/trifle/docs/helper/ai_detection.rb +30 -0
- data/lib/trifle/docs/helper/markdown_layout.rb +51 -0
- data/lib/trifle/docs/operations/raw_content.rb +24 -0
- data/lib/trifle/docs/operations/search.rb +25 -0
- data/lib/trifle/docs/version.rb +1 -1
- data/lib/trifle/docs.rb +16 -0
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b52d0fa673ad7999994caf2fda33169ac1815587cbf3023cce7a3a5ec65373a2
|
|
4
|
+
data.tar.gz: 56c8c894a50aef901915dee9eab80a2f7b7e5a6fe3b46cbe344f4360f0c639b1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 59e6d106be4c79000a4470429308ce833ba8a6e0e3d02d8b1bdfacafd638030e14687b53aa5f5d81f55b91158b8873d9eb624bc0665d04d3bb781afc31927c8d
|
|
7
|
+
data.tar.gz: d7ca56cd6809bd75891a5bf6b8fd49847007539dced62b5c69b43d9c779e00d963c3986fba84ff457d68edd52947cc5a554333584325ac56bb42e2f95d4e5579
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
trifle-docs (0.
|
|
4
|
+
trifle-docs (0.5.0)
|
|
5
5
|
redcarpet
|
|
6
6
|
rouge
|
|
7
7
|
sinatra
|
|
@@ -11,18 +11,18 @@ GEM
|
|
|
11
11
|
remote: https://rubygems.org/
|
|
12
12
|
specs:
|
|
13
13
|
ast (2.4.2)
|
|
14
|
-
base64 (0.
|
|
14
|
+
base64 (0.3.0)
|
|
15
15
|
byebug (11.1.3)
|
|
16
16
|
diff-lcs (1.5.0)
|
|
17
|
-
mustermann (3.0.
|
|
17
|
+
mustermann (3.0.3)
|
|
18
18
|
ruby2_keywords (~> 0.0.1)
|
|
19
|
-
nio4r (2.
|
|
19
|
+
nio4r (2.7.4)
|
|
20
20
|
parallel (1.22.1)
|
|
21
21
|
parser (3.1.2.0)
|
|
22
22
|
ast (~> 2.4.1)
|
|
23
|
-
puma (
|
|
23
|
+
puma (6.6.0)
|
|
24
24
|
nio4r (~> 2.0)
|
|
25
|
-
rack (3.
|
|
25
|
+
rack (3.1.16)
|
|
26
26
|
rack-protection (4.0.0)
|
|
27
27
|
base64 (>= 0.1.0)
|
|
28
28
|
rack (>= 3.0.0, < 4)
|
|
@@ -72,6 +72,7 @@ GEM
|
|
|
72
72
|
|
|
73
73
|
PLATFORMS
|
|
74
74
|
arm64-darwin-21
|
|
75
|
+
arm64-darwin-24
|
|
75
76
|
x86_64-linux
|
|
76
77
|
|
|
77
78
|
DEPENDENCIES
|
data/README.md
CHANGED
|
@@ -1,53 +1,79 @@
|
|
|
1
1
|
# Trifle::Docs
|
|
2
2
|
|
|
3
|
-
[](https://
|
|
4
|
-

|
|
5
|
-
[](https://gitpod.io/#https://github.com/trifle-io/trifle-docs)
|
|
3
|
+
[](https://rubygems.org/gems/trifle-docs)
|
|
4
|
+
[](https://github.com/trifle-io/trifle-docs)
|
|
6
5
|
|
|
7
|
-
Simple
|
|
6
|
+
Simple router for your static documentation. Like markdown, or textile, or whatever files. It maps your docs folder structure into URLs and renders them within the simplest template possible.
|
|
8
7
|
|
|
9
|
-
|
|
8
|
+
## Documentation
|
|
10
9
|
|
|
11
|
-
|
|
10
|
+
For comprehensive guides, API reference, and examples, visit [trifle.io/trifle-docs](https://trifle.io/trifle-docs)
|
|
12
11
|
|
|
12
|
+

|
|
13
13
|
|
|
14
|
-
##
|
|
14
|
+
## Installation
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
Add this line to your application's Gemfile:
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
```ruby
|
|
19
|
+
gem 'trifle-docs'
|
|
20
|
+
```
|
|
19
21
|
|
|
20
|
-
|
|
22
|
+
And then execute:
|
|
21
23
|
|
|
22
|
-
```
|
|
23
|
-
$ bundle
|
|
24
|
+
```bash
|
|
25
|
+
$ bundle install
|
|
24
26
|
```
|
|
25
27
|
|
|
26
|
-
|
|
28
|
+
Or install it yourself as:
|
|
27
29
|
|
|
28
|
-
```
|
|
30
|
+
```bash
|
|
29
31
|
$ gem install trifle-docs
|
|
30
32
|
```
|
|
31
33
|
|
|
32
|
-
##
|
|
34
|
+
## Quick Start
|
|
33
35
|
|
|
34
|
-
|
|
36
|
+
### 1. Configure
|
|
35
37
|
|
|
36
38
|
```ruby
|
|
37
|
-
# app.rb
|
|
38
39
|
require 'trifle/docs'
|
|
39
40
|
|
|
40
41
|
Trifle::Docs.configure do |config|
|
|
41
|
-
config.path = 'docs'
|
|
42
|
-
config.
|
|
42
|
+
config.path = File.join(__dir__, 'docs')
|
|
43
|
+
config.views = File.join(__dir__, 'templates')
|
|
43
44
|
config.register_harvester(Trifle::Docs::Harvester::Markdown)
|
|
44
45
|
config.register_harvester(Trifle::Docs::Harvester::File)
|
|
45
46
|
end
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### 2. Create documentation structure
|
|
46
50
|
|
|
47
|
-
Trifle::Docs.App.run!
|
|
48
51
|
```
|
|
52
|
+
docs/
|
|
53
|
+
├── index.md
|
|
54
|
+
├── getting-started/
|
|
55
|
+
│ ├── index.md
|
|
56
|
+
│ └── installation.md
|
|
57
|
+
└── api/
|
|
58
|
+
├── index.md
|
|
59
|
+
└── reference.md
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### 3. Use in your application
|
|
49
63
|
|
|
50
|
-
|
|
64
|
+
```ruby
|
|
65
|
+
# As Rack middleware
|
|
66
|
+
use Trifle::Docs::Middleware
|
|
67
|
+
|
|
68
|
+
# Or mount in Rails
|
|
69
|
+
Rails.application.routes.draw do
|
|
70
|
+
mount Trifle::Docs::Engine => '/docs'
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Or Sinatra app
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 4. Templates
|
|
51
77
|
|
|
52
78
|
Please create two files in folder you provided the configuration.
|
|
53
79
|
|
|
@@ -67,7 +93,7 @@ Please create two files in folder you provided the configuration.
|
|
|
67
93
|
<%= content %>
|
|
68
94
|
```
|
|
69
95
|
|
|
70
|
-
|
|
96
|
+
#### Template variables
|
|
71
97
|
|
|
72
98
|
There are several variables available in your template file (except `layout.erb`).
|
|
73
99
|
- `sitemap` - complete sitemap tree of the folder.
|
|
@@ -75,22 +101,43 @@ There are several variables available in your template file (except `layout.erb`
|
|
|
75
101
|
- `content` - rendered markdown file.
|
|
76
102
|
- `meta` - metadata from markdown file.
|
|
77
103
|
|
|
78
|
-
##
|
|
104
|
+
## Features
|
|
105
|
+
|
|
106
|
+
- **File-based routing** - Maps folder structure to URL paths
|
|
107
|
+
- **Multiple harvesters** - Markdown, textile, and custom file processors
|
|
108
|
+
- **Template system** - ERB templates with layout support
|
|
109
|
+
- **Flexible integration** - Works with Rack, Rails, Sinatra
|
|
110
|
+
- **Caching support** - Optional caching for production environments
|
|
111
|
+
- **Navigation helpers** - Automatic menu and breadcrumb generation
|
|
112
|
+
|
|
113
|
+
## Harvesters
|
|
114
|
+
|
|
115
|
+
Trifle::Docs supports multiple content processors:
|
|
116
|
+
|
|
117
|
+
- **Markdown** - Process `.md` files with frontmatter support
|
|
118
|
+
- **File** - Handle static assets and non-markdown content
|
|
119
|
+
- **Custom** - Build your own harvesters for specialized content
|
|
79
120
|
|
|
80
|
-
|
|
121
|
+
## Testing
|
|
81
122
|
|
|
82
|
-
|
|
123
|
+
Tests focus on documenting behavior and ensuring reliability. To run the test suite:
|
|
83
124
|
|
|
84
|
-
|
|
125
|
+
```bash
|
|
126
|
+
$ bundle exec rspec
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Tests are meant to be **simple and isolated**. Every test should be **independent** and able to run in any order. Tests should be **self-contained** and set up their own configuration.
|
|
130
|
+
|
|
131
|
+
Use **single layer testing** to focus on testing a specific class or module in isolation. Use **appropriate stubbing** for file system operations when testing harvesters and routing logic.
|
|
132
|
+
|
|
133
|
+
**Repeat yourself** in test setup for clarity rather than complex shared setups that can hide dependencies.
|
|
134
|
+
|
|
135
|
+
Tests verify that file system changes are properly reflected in the documentation routing and that templates render correctly with provided content.
|
|
85
136
|
|
|
86
137
|
## Contributing
|
|
87
138
|
|
|
88
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/trifle-io/trifle-docs.
|
|
139
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/trifle-io/trifle-docs.
|
|
89
140
|
|
|
90
141
|
## License
|
|
91
142
|
|
|
92
143
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
|
93
|
-
|
|
94
|
-
## Code of Conduct
|
|
95
|
-
|
|
96
|
-
Everyone interacting in the Trifle::Docs project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/trifle-io/trifle-docs/blob/master/CODE_OF_CONDUCT.md).
|
data/lib/trifle/docs/app.rb
CHANGED
|
@@ -10,11 +10,35 @@ module Trifle
|
|
|
10
10
|
set :views, proc { Trifle::Docs.default.views }
|
|
11
11
|
end
|
|
12
12
|
|
|
13
|
+
get '/search' do
|
|
14
|
+
results = Trifle::Docs.search(query: params['query'], scope: params['scope'])
|
|
15
|
+
erb(
|
|
16
|
+
'search'.to_sym,
|
|
17
|
+
{},
|
|
18
|
+
{
|
|
19
|
+
results: results,
|
|
20
|
+
query: params['query'],
|
|
21
|
+
scope: params['scope'],
|
|
22
|
+
sitemap: Trifle::Docs.sitemap,
|
|
23
|
+
meta: { description: 'Search' }
|
|
24
|
+
}
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
|
|
13
28
|
get '/*' do
|
|
14
29
|
url = params['splat'].first.chomp('/')
|
|
15
30
|
meta = Trifle::Docs.meta(url: url)
|
|
16
31
|
halt(404, 'Not Found') if meta.nil?
|
|
17
32
|
|
|
33
|
+
if Trifle::Docs::Helper::AiDetection.ai_scraper?(request.user_agent) && meta['type'] != 'file'
|
|
34
|
+
content_type 'text/markdown'
|
|
35
|
+
return Trifle::Docs::Helper::MarkdownLayout.render(
|
|
36
|
+
meta: meta,
|
|
37
|
+
raw_content: Trifle::Docs.raw_content(url: url),
|
|
38
|
+
sitemap: Trifle::Docs.sitemap
|
|
39
|
+
)
|
|
40
|
+
end
|
|
41
|
+
|
|
18
42
|
if meta['type'] == 'file'
|
|
19
43
|
send_file meta['path']
|
|
20
44
|
else
|
data/lib/trifle/docs/engine.rb
CHANGED
|
@@ -17,6 +17,7 @@ if Object.const_defined?('Rails')
|
|
|
17
17
|
def self.draw
|
|
18
18
|
Trifle::Docs::Engine.routes.draw do
|
|
19
19
|
root to: 'page#show'
|
|
20
|
+
get 'search', to: 'page#search'
|
|
20
21
|
get '*url', to: 'page#show'
|
|
21
22
|
end
|
|
22
23
|
end
|
|
@@ -33,15 +34,32 @@ if Object.const_defined?('Rails')
|
|
|
33
34
|
"layouts/trifle/docs/#{configuration.layout}"
|
|
34
35
|
end
|
|
35
36
|
|
|
36
|
-
def show
|
|
37
|
+
def show # rubocop:disable Metrics/AbcSize
|
|
37
38
|
url = [params[:url], params[:format]].compact.join('.')
|
|
38
39
|
meta = Trifle::Docs.meta(url: url, config: configuration)
|
|
39
40
|
render_not_found and return if meta.nil?
|
|
41
|
+
|
|
42
|
+
if Trifle::Docs::Helper::AiDetection.ai_scraper?(request.user_agent) && meta['type'] != 'file'
|
|
43
|
+
render_markdown(url: url, meta: meta)
|
|
44
|
+
return
|
|
45
|
+
end
|
|
40
46
|
render_file(meta: meta) and return if meta['type'] == 'file'
|
|
41
47
|
|
|
42
48
|
render_content(url: url, meta: meta)
|
|
43
49
|
end
|
|
44
50
|
|
|
51
|
+
def search
|
|
52
|
+
results = Trifle::Docs.search(query: params[:query], scope: params[:scope])
|
|
53
|
+
|
|
54
|
+
render 'search', locals: {
|
|
55
|
+
results: results,
|
|
56
|
+
query: params[:query],
|
|
57
|
+
scope: params[:scope],
|
|
58
|
+
sitemap: Trifle::Docs.sitemap,
|
|
59
|
+
meta: { description: 'Search' }
|
|
60
|
+
}
|
|
61
|
+
end
|
|
62
|
+
|
|
45
63
|
def render_not_found
|
|
46
64
|
render text: 'Not Found', status: 404
|
|
47
65
|
end
|
|
@@ -59,6 +77,14 @@ if Object.const_defined?('Rails')
|
|
|
59
77
|
url: url
|
|
60
78
|
}
|
|
61
79
|
end
|
|
80
|
+
|
|
81
|
+
def render_markdown
|
|
82
|
+
render plain: Trifle::Docs::Helper::MarkdownLayout.render(
|
|
83
|
+
meta: meta,
|
|
84
|
+
raw_content: Trifle::Docs.raw_content(url: url, config: configuration),
|
|
85
|
+
sitemap: Trifle::Docs.sitemap(config: configuration)
|
|
86
|
+
), content_type: 'text/markdown'
|
|
87
|
+
end
|
|
62
88
|
end
|
|
63
89
|
end
|
|
64
90
|
end
|
|
@@ -33,12 +33,21 @@ module Trifle
|
|
|
33
33
|
Render.new(with_toc_data: true),
|
|
34
34
|
fenced_code_blocks: true,
|
|
35
35
|
disable_indented_code_blocks: true,
|
|
36
|
-
footnotes: true
|
|
36
|
+
footnotes: true,
|
|
37
|
+
tables: true
|
|
37
38
|
).render(data.sub(/^---(.*?)---(\s*)/m, ''))
|
|
38
39
|
rescue StandardError => e
|
|
39
40
|
puts "Markdown: Failed to parse CONTENT for #{file}: #{e}"
|
|
40
41
|
end
|
|
41
42
|
|
|
43
|
+
def raw_content
|
|
44
|
+
@raw_content = nil unless cache
|
|
45
|
+
|
|
46
|
+
@raw_content ||= data.sub(/^---(.*?)---(\s*)/m, '').strip
|
|
47
|
+
rescue StandardError => e
|
|
48
|
+
puts "Markdown: Failed to load RAW CONTENT for #{file}: #{e}"
|
|
49
|
+
end
|
|
50
|
+
|
|
42
51
|
def default_meta
|
|
43
52
|
{
|
|
44
53
|
'url' => "/#{[namespace, url].compact.join('/')}",
|
|
@@ -67,6 +76,16 @@ module Trifle
|
|
|
67
76
|
rescue StandardError => e
|
|
68
77
|
puts "Markdown: Failed to parse TOC for #{file}: #{e}"
|
|
69
78
|
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def preload_cache
|
|
83
|
+
data
|
|
84
|
+
content
|
|
85
|
+
raw_content
|
|
86
|
+
meta
|
|
87
|
+
toc
|
|
88
|
+
end
|
|
70
89
|
end
|
|
71
90
|
end
|
|
72
91
|
end
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module Trifle
|
|
4
4
|
module Docs
|
|
5
5
|
module Harvester
|
|
6
|
-
class Walker
|
|
6
|
+
class Walker # rubocop:disable Metrics/ClassLength
|
|
7
7
|
attr_reader :path, :router, :namespace, :cache
|
|
8
8
|
|
|
9
9
|
def initialize(**keywords)
|
|
@@ -41,6 +41,32 @@ module Trifle
|
|
|
41
41
|
end
|
|
42
42
|
end
|
|
43
43
|
|
|
44
|
+
def search_for(query:, scope: nil, limit: 10) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
|
45
|
+
return [] if query.nil? || query.strip.empty?
|
|
46
|
+
|
|
47
|
+
query_terms = [query.downcase.strip]
|
|
48
|
+
matches = []
|
|
49
|
+
|
|
50
|
+
searchable_routes = filter_searchable_routes(scope)
|
|
51
|
+
|
|
52
|
+
searchable_routes.each do |url, conveyor|
|
|
53
|
+
score = calculate_fuzzy_match_score(conveyor, query_terms)
|
|
54
|
+
next if score.zero?
|
|
55
|
+
|
|
56
|
+
matches << {
|
|
57
|
+
url: url,
|
|
58
|
+
conveyor: conveyor,
|
|
59
|
+
score: score
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
break if matches.size >= limit * 2
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
matches.sort_by { |match| -match[:score] }
|
|
66
|
+
.first(limit)
|
|
67
|
+
.map { |match| build_search_result(match, query_terms) }
|
|
68
|
+
end
|
|
69
|
+
|
|
44
70
|
def collection_for(url:)
|
|
45
71
|
return sitemap if url.empty?
|
|
46
72
|
|
|
@@ -51,6 +77,17 @@ module Trifle
|
|
|
51
77
|
route_for(url: url)&.content
|
|
52
78
|
end
|
|
53
79
|
|
|
80
|
+
def raw_content_for(url:)
|
|
81
|
+
conveyor = route_for(url: url)
|
|
82
|
+
return unless conveyor
|
|
83
|
+
|
|
84
|
+
if conveyor.respond_to?(:raw_content)
|
|
85
|
+
conveyor.raw_content
|
|
86
|
+
elsif conveyor.respond_to?(:content)
|
|
87
|
+
conveyor.content
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
54
91
|
def meta_for(url:)
|
|
55
92
|
route_for(url: url)&.meta
|
|
56
93
|
end
|
|
@@ -62,6 +99,245 @@ module Trifle
|
|
|
62
99
|
def not_found(url:)
|
|
63
100
|
puts "No route found for url: #{url}"
|
|
64
101
|
end
|
|
102
|
+
|
|
103
|
+
private
|
|
104
|
+
|
|
105
|
+
def filter_searchable_routes(scope)
|
|
106
|
+
router.select do |url, conveyor|
|
|
107
|
+
# Only include searchable harvesters (exclude File harvester)
|
|
108
|
+
searchable_conveyor?(conveyor) && (scope.nil? || url.start_with?(scope))
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def searchable_conveyor?(conveyor)
|
|
113
|
+
# Include only conveyors that have searchable content (exclude File harvester)
|
|
114
|
+
conveyor.respond_to?(:content) && conveyor.respond_to?(:meta) &&
|
|
115
|
+
!conveyor.class.name.include?('File')
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def calculate_fuzzy_match_score(conveyor, query_terms) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
119
|
+
score = 0
|
|
120
|
+
searchable_content = extract_searchable_content(conveyor)
|
|
121
|
+
query = query_terms.first
|
|
122
|
+
|
|
123
|
+
# Fuzzy matching with different strategies
|
|
124
|
+
searchable_content.each do |field, content|
|
|
125
|
+
next if content.nil? || content.empty?
|
|
126
|
+
|
|
127
|
+
field_weight = get_field_weight(field)
|
|
128
|
+
|
|
129
|
+
# Exact match (highest score)
|
|
130
|
+
exact_matches = content.scan(/#{Regexp.escape(query)}/i).size
|
|
131
|
+
score += exact_matches * field_weight * 10
|
|
132
|
+
|
|
133
|
+
# Subsequence match (fzf-like)
|
|
134
|
+
score += field_weight * 5 if subsequence_match?(content, query)
|
|
135
|
+
|
|
136
|
+
# N-gram similarity
|
|
137
|
+
ngram_score = calculate_ngram_similarity(content, query)
|
|
138
|
+
score += (ngram_score * field_weight * 3).to_i
|
|
139
|
+
|
|
140
|
+
# Word boundary matches
|
|
141
|
+
word_matches = content.downcase.scan(/\b#{Regexp.escape(query)}\b/i).size
|
|
142
|
+
score += word_matches * field_weight * 8
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
score
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def calculate_match_score(conveyor, query_terms) # rubocop:disable Metrics/AbcSize
|
|
149
|
+
score = 0
|
|
150
|
+
|
|
151
|
+
searchable_content = extract_searchable_content(conveyor)
|
|
152
|
+
|
|
153
|
+
query_terms.each do |term|
|
|
154
|
+
score += search_in_field(searchable_content[:title], term, weight: 10)
|
|
155
|
+
score += search_in_field(searchable_content[:url], term, weight: 8)
|
|
156
|
+
score += search_in_field(searchable_content[:tags], term, weight: 7)
|
|
157
|
+
score += search_in_field(searchable_content[:content], term, weight: 1)
|
|
158
|
+
score += search_in_field(searchable_content[:meta], term, weight: 5)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
score
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def extract_searchable_content(conveyor) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
|
165
|
+
content = {}
|
|
166
|
+
|
|
167
|
+
content[:url] = conveyor.url.downcase
|
|
168
|
+
|
|
169
|
+
if conveyor.respond_to?(:content) && conveyor.cache
|
|
170
|
+
content[:content] = strip_html(conveyor.content || '').downcase
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
if conveyor.respond_to?(:meta) && conveyor.cache
|
|
174
|
+
meta = conveyor.meta || {}
|
|
175
|
+
content[:title] = (meta['title'] || '').downcase
|
|
176
|
+
content[:meta] = meta.values.join(' ').downcase
|
|
177
|
+
|
|
178
|
+
# Include tags as searchable content
|
|
179
|
+
tags = extract_tags(conveyor)
|
|
180
|
+
content[:tags] = tags.join(' ').downcase
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
content
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def search_in_field(field_content, term, weight:)
|
|
187
|
+
return 0 if field_content.nil? || field_content.empty?
|
|
188
|
+
|
|
189
|
+
occurrences = field_content.scan(/#{Regexp.escape(term)}/i).size
|
|
190
|
+
occurrences * weight
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def build_search_result(match, query_terms) # rubocop:disable Metrics/MethodLength
|
|
194
|
+
conveyor = match[:conveyor]
|
|
195
|
+
title = extract_title(conveyor)
|
|
196
|
+
excerpt = generate_excerpt(conveyor, query_terms)
|
|
197
|
+
tags = extract_tags(conveyor)
|
|
198
|
+
|
|
199
|
+
{
|
|
200
|
+
url: match[:url],
|
|
201
|
+
title: title,
|
|
202
|
+
excerpt: excerpt,
|
|
203
|
+
tags: tags,
|
|
204
|
+
score: match[:score]
|
|
205
|
+
}
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def extract_title(conveyor) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
209
|
+
if conveyor.respond_to?(:meta) && conveyor.cache
|
|
210
|
+
meta = conveyor.meta || {}
|
|
211
|
+
title = meta['title']
|
|
212
|
+
return title unless title.nil? || title.strip.empty?
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
conveyor.url.split('/').last&.gsub(/[-_]/, ' ')&.split&.map(&:capitalize)&.join(' ') || 'Untitled'
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def extract_tags(conveyor) # rubocop:disable Metrics/MethodLength
|
|
219
|
+
return [] unless conveyor.respond_to?(:meta) && conveyor.cache
|
|
220
|
+
|
|
221
|
+
meta = conveyor.meta || {}
|
|
222
|
+
tags = meta['tags']
|
|
223
|
+
|
|
224
|
+
case tags
|
|
225
|
+
when Array
|
|
226
|
+
tags.compact.map(&:to_s)
|
|
227
|
+
when String
|
|
228
|
+
[tags]
|
|
229
|
+
else
|
|
230
|
+
[]
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def generate_excerpt(conveyor, query_terms, max_length: 200)
|
|
235
|
+
return nil unless conveyor.respond_to?(:content) && conveyor.cache
|
|
236
|
+
|
|
237
|
+
content = strip_html(conveyor.content || '')
|
|
238
|
+
return nil if content.empty?
|
|
239
|
+
|
|
240
|
+
best_excerpt = find_best_excerpt(content, query_terms, max_length)
|
|
241
|
+
best_excerpt || content[0, max_length] + (content.length > max_length ? '...' : '')
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def find_best_excerpt(content, query_terms, max_length) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
|
245
|
+
return nil if query_terms.empty?
|
|
246
|
+
|
|
247
|
+
first_match_pos = nil
|
|
248
|
+
matched_term = nil
|
|
249
|
+
|
|
250
|
+
query_terms.each do |term|
|
|
251
|
+
pos = content.downcase.index(term.downcase)
|
|
252
|
+
if pos && (first_match_pos.nil? || pos < first_match_pos)
|
|
253
|
+
first_match_pos = pos
|
|
254
|
+
matched_term = term
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
return nil unless first_match_pos
|
|
259
|
+
|
|
260
|
+
context_size = (max_length - matched_term.length) / 2
|
|
261
|
+
start_pos = [first_match_pos - context_size, 0].max
|
|
262
|
+
end_pos = [start_pos + max_length, content.length].min
|
|
263
|
+
|
|
264
|
+
start_pos = find_word_boundary(content, start_pos, :backward)
|
|
265
|
+
end_pos = find_word_boundary(content, end_pos, :forward)
|
|
266
|
+
|
|
267
|
+
excerpt = content[start_pos...end_pos]
|
|
268
|
+
|
|
269
|
+
excerpt = "...#{excerpt}" if start_pos.positive?
|
|
270
|
+
excerpt += '...' if end_pos < content.length
|
|
271
|
+
|
|
272
|
+
excerpt
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def find_word_boundary(content, pos, direction) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
|
276
|
+
return pos if pos <= 0 || pos >= content.length
|
|
277
|
+
|
|
278
|
+
if direction == :backward
|
|
279
|
+
pos -= 1 while pos.positive? && !content[pos].match?(/\s/)
|
|
280
|
+
pos += 1 if content[pos].match?(/\s/)
|
|
281
|
+
else # :forward
|
|
282
|
+
pos += 1 while pos < content.length && !content[pos].match?(/\s/)
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
pos
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def get_field_weight(field)
|
|
289
|
+
case field
|
|
290
|
+
when :title then 10
|
|
291
|
+
when :url then 8
|
|
292
|
+
when :tags then 7
|
|
293
|
+
when :meta then 5
|
|
294
|
+
when :content then 1
|
|
295
|
+
else 1
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def subsequence_match?(text, pattern)
|
|
300
|
+
# fzf-like subsequence matching
|
|
301
|
+
text_idx = 0
|
|
302
|
+
pattern_idx = 0
|
|
303
|
+
|
|
304
|
+
while text_idx < text.length && pattern_idx < pattern.length
|
|
305
|
+
pattern_idx += 1 if text[text_idx].downcase == pattern[pattern_idx].downcase
|
|
306
|
+
text_idx += 1
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
pattern_idx == pattern.length
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
def calculate_ngram_similarity(text, pattern, n: 2) # rubocop:disable Naming/MethodParameterName
|
|
313
|
+
return 0 if text.length < n || pattern.length < n
|
|
314
|
+
|
|
315
|
+
text_ngrams = get_ngrams(text.downcase, n)
|
|
316
|
+
pattern_ngrams = get_ngrams(pattern.downcase, n)
|
|
317
|
+
|
|
318
|
+
return 0 if text_ngrams.empty? || pattern_ngrams.empty?
|
|
319
|
+
|
|
320
|
+
intersection = text_ngrams & pattern_ngrams
|
|
321
|
+
union = text_ngrams | pattern_ngrams
|
|
322
|
+
|
|
323
|
+
intersection.length.to_f / union.length
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def get_ngrams(text, n) # rubocop:disable Naming/MethodParameterName
|
|
327
|
+
return [] if text.length < n
|
|
328
|
+
|
|
329
|
+
(0..text.length - n).map do |i|
|
|
330
|
+
text[i, n]
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
def strip_html(html)
|
|
335
|
+
return '' if html.nil?
|
|
336
|
+
|
|
337
|
+
# Ensure valid UTF-8 encoding
|
|
338
|
+
html = html.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
|
339
|
+
html.gsub(/<[^>]*>/, ' ').gsub(/\s+/, ' ').strip
|
|
340
|
+
end
|
|
65
341
|
end
|
|
66
342
|
|
|
67
343
|
class Sieve
|
|
@@ -89,12 +365,18 @@ module Trifle
|
|
|
89
365
|
@url = url
|
|
90
366
|
@namespace = namespace
|
|
91
367
|
@cache = cache
|
|
368
|
+
|
|
369
|
+
preload_cache if cache
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
def preload_cache
|
|
373
|
+
# NOTE: harvester is responsible for cache implementation
|
|
92
374
|
end
|
|
93
375
|
|
|
94
376
|
def data
|
|
95
377
|
@data = nil unless cache
|
|
96
378
|
|
|
97
|
-
@data ||= ::File.read(file)
|
|
379
|
+
@data ||= ::File.read(file, encoding: 'utf-8')
|
|
98
380
|
end
|
|
99
381
|
end
|
|
100
382
|
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Trifle
|
|
4
|
+
module Docs
|
|
5
|
+
module Helper
|
|
6
|
+
module AiDetection
|
|
7
|
+
AI_SCRAPER_PATTERNS = [
|
|
8
|
+
/GPTBot/i,
|
|
9
|
+
/ChatGPT/i,
|
|
10
|
+
/ClaudeBot/i,
|
|
11
|
+
/Claude-Web/i,
|
|
12
|
+
/anthropic/i,
|
|
13
|
+
/Perplexity/i,
|
|
14
|
+
/Google-Extended/i,
|
|
15
|
+
/CCBot/i,
|
|
16
|
+
/AI2Bot/i,
|
|
17
|
+
/FacebookBot/i
|
|
18
|
+
].freeze
|
|
19
|
+
|
|
20
|
+
module_function
|
|
21
|
+
|
|
22
|
+
def ai_scraper?(user_agent)
|
|
23
|
+
return false if user_agent.nil? || user_agent.empty?
|
|
24
|
+
|
|
25
|
+
AI_SCRAPER_PATTERNS.any? { |pattern| user_agent.match?(pattern) }
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Trifle
|
|
4
|
+
module Docs
|
|
5
|
+
module Helper
|
|
6
|
+
module MarkdownLayout
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
def render(meta:, raw_content:, sitemap:) # rubocop:disable Metrics/MethodLength
|
|
10
|
+
lines = []
|
|
11
|
+
title = meta['title'] || derive_title_from_url(meta['url'])
|
|
12
|
+
|
|
13
|
+
lines << "# #{title}"
|
|
14
|
+
lines << ''
|
|
15
|
+
lines << '## Navigation'
|
|
16
|
+
lines << navigation_toc(sitemap)
|
|
17
|
+
lines << ''
|
|
18
|
+
lines << '## Content'
|
|
19
|
+
lines << raw_content.to_s.strip
|
|
20
|
+
lines << ''
|
|
21
|
+
|
|
22
|
+
lines.join("\n")
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def navigation_toc(sitemap, depth: 0) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
26
|
+
return '' unless sitemap.is_a?(Hash)
|
|
27
|
+
|
|
28
|
+
sitemap.keys.reject { |k| k == '_meta' }.sort.map do |key|
|
|
29
|
+
node = sitemap[key]
|
|
30
|
+
meta = node['_meta'] || {}
|
|
31
|
+
title = meta['title'] || derive_title_from_url(meta['url'] || key)
|
|
32
|
+
url = meta['url'] || "/#{key}"
|
|
33
|
+
indent = ' ' * depth
|
|
34
|
+
children = node.reject { |child_key, _| child_key == '_meta' }
|
|
35
|
+
|
|
36
|
+
[
|
|
37
|
+
"#{indent}- [#{title}](#{url})",
|
|
38
|
+
navigation_toc(children, depth: depth + 1)
|
|
39
|
+
].reject(&:empty?).join("\n")
|
|
40
|
+
end.join("\n")
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def derive_title_from_url(url)
|
|
44
|
+
return 'Untitled' if url.nil? || url.empty?
|
|
45
|
+
|
|
46
|
+
url.split('/').last.to_s.gsub(/[-_]/, ' ').split.map(&:capitalize).join(' ')
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Trifle
|
|
4
|
+
module Docs
|
|
5
|
+
module Operations
|
|
6
|
+
class RawContent
|
|
7
|
+
attr_reader :url
|
|
8
|
+
|
|
9
|
+
def initialize(**keywords)
|
|
10
|
+
@url = keywords.fetch(:url)
|
|
11
|
+
@config = keywords[:config]
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def config
|
|
15
|
+
@config || Trifle::Docs.default
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def perform
|
|
19
|
+
config.harvester.raw_content_for(url: url)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Trifle
|
|
4
|
+
module Docs
|
|
5
|
+
module Operations
|
|
6
|
+
class Search
|
|
7
|
+
attr_reader :query, :scope
|
|
8
|
+
|
|
9
|
+
def initialize(**keywords)
|
|
10
|
+
@query = keywords.fetch(:query)
|
|
11
|
+
@scope = keywords[:scope] # Optional scope to limit search to subfolder
|
|
12
|
+
@config = keywords[:config]
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def config
|
|
16
|
+
@config || Trifle::Docs.default
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def perform
|
|
20
|
+
config.harvester.search_for(query: query, scope: scope)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
data/lib/trifle/docs/version.rb
CHANGED
data/lib/trifle/docs.rb
CHANGED
|
@@ -2,10 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative 'docs/configuration'
|
|
4
4
|
require_relative 'docs/helper/tree'
|
|
5
|
+
require_relative 'docs/helper/markdown_layout'
|
|
6
|
+
require_relative 'docs/helper/ai_detection'
|
|
5
7
|
require_relative 'docs/harvester'
|
|
6
8
|
require_relative 'docs/harvester/file'
|
|
7
9
|
require_relative 'docs/harvester/markdown'
|
|
10
|
+
require_relative 'docs/operations/search'
|
|
8
11
|
require_relative 'docs/operations/content'
|
|
12
|
+
require_relative 'docs/operations/raw_content'
|
|
9
13
|
require_relative 'docs/operations/collection'
|
|
10
14
|
require_relative 'docs/operations/meta'
|
|
11
15
|
require_relative 'docs/operations/sitemap'
|
|
@@ -28,6 +32,12 @@ module Trifle
|
|
|
28
32
|
default
|
|
29
33
|
end
|
|
30
34
|
|
|
35
|
+
def self.search(query:, scope: nil, config: nil)
|
|
36
|
+
Trifle::Docs::Operations::Search.new(
|
|
37
|
+
query: query, scope: scope, config: config
|
|
38
|
+
).perform
|
|
39
|
+
end
|
|
40
|
+
|
|
31
41
|
def self.content(url:, config: nil)
|
|
32
42
|
Trifle::Docs::Operations::Content.new(
|
|
33
43
|
url: url, config: config
|
|
@@ -40,6 +50,12 @@ module Trifle
|
|
|
40
50
|
).perform
|
|
41
51
|
end
|
|
42
52
|
|
|
53
|
+
def self.raw_content(url:, config: nil)
|
|
54
|
+
Trifle::Docs::Operations::RawContent.new(
|
|
55
|
+
url: url, config: config
|
|
56
|
+
).perform
|
|
57
|
+
end
|
|
58
|
+
|
|
43
59
|
def self.collection(url:, config: nil)
|
|
44
60
|
Trifle::Docs::Operations::Collection.new(
|
|
45
61
|
url: url, config: config
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: trifle-docs
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jozef Vaclavik
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2025-12-11 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -200,10 +200,14 @@ files:
|
|
|
200
200
|
- lib/trifle/docs/harvester.rb
|
|
201
201
|
- lib/trifle/docs/harvester/file.rb
|
|
202
202
|
- lib/trifle/docs/harvester/markdown.rb
|
|
203
|
+
- lib/trifle/docs/helper/ai_detection.rb
|
|
204
|
+
- lib/trifle/docs/helper/markdown_layout.rb
|
|
203
205
|
- lib/trifle/docs/helper/tree.rb
|
|
204
206
|
- lib/trifle/docs/operations/collection.rb
|
|
205
207
|
- lib/trifle/docs/operations/content.rb
|
|
206
208
|
- lib/trifle/docs/operations/meta.rb
|
|
209
|
+
- lib/trifle/docs/operations/raw_content.rb
|
|
210
|
+
- lib/trifle/docs/operations/search.rb
|
|
207
211
|
- lib/trifle/docs/operations/sitemap.rb
|
|
208
212
|
- lib/trifle/docs/version.rb
|
|
209
213
|
- sig/trifle/docs.rbs
|