mddir 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +12 -0
- data/LICENSE.txt +21 -0
- data/README.md +160 -0
- data/exe/mddir +6 -0
- data/lib/mddir/cli.rb +230 -0
- data/lib/mddir/collection.rb +113 -0
- data/lib/mddir/config.rb +55 -0
- data/lib/mddir/entry.rb +77 -0
- data/lib/mddir/fetcher.rb +213 -0
- data/lib/mddir/global_index.rb +51 -0
- data/lib/mddir/search.rb +73 -0
- data/lib/mddir/search_index.rb +107 -0
- data/lib/mddir/server.rb +152 -0
- data/lib/mddir/utils.rb +46 -0
- data/lib/mddir/version.rb +5 -0
- data/lib/mddir.rb +16 -0
- data/public/style.css +435 -0
- data/views/collection.erb +43 -0
- data/views/home.erb +21 -0
- data/views/layout.erb +44 -0
- data/views/reader.erb +24 -0
- data/views/search.erb +30 -0
- metadata +282 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "http-cookie"
|
|
4
|
+
require "httpx"
|
|
5
|
+
require "nokogiri"
|
|
6
|
+
require "readability"
|
|
7
|
+
require "reverse_markdown"
|
|
8
|
+
require "yaml"
|
|
9
|
+
|
|
10
|
+
module Mddir
|
|
11
|
+
class Fetcher # rubocop:disable Metrics/ClassLength
|
|
12
|
+
CONNECT_TIMEOUT = 15
|
|
13
|
+
READ_TIMEOUT = 30
|
|
14
|
+
|
|
15
|
+
READABILITY_TAGS = %w[
|
|
16
|
+
div p span
|
|
17
|
+
h1 h2 h3 h4 h5 h6
|
|
18
|
+
pre code
|
|
19
|
+
ul ol li
|
|
20
|
+
table thead tbody tfoot tr th td
|
|
21
|
+
blockquote
|
|
22
|
+
a img br hr
|
|
23
|
+
strong em b i u s del sub sup
|
|
24
|
+
dl dt dd
|
|
25
|
+
figure figcaption
|
|
26
|
+
details summary
|
|
27
|
+
].freeze
|
|
28
|
+
|
|
29
|
+
READABILITY_ATTRIBUTES = %w[href src alt title lang class id style].freeze
|
|
30
|
+
|
|
31
|
+
def initialize(config, cookies_path: nil)
|
|
32
|
+
@config = config
|
|
33
|
+
@cookie_jar = load_cookies(cookies_path)
|
|
34
|
+
@client = build_client
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def fetch(url)
|
|
38
|
+
response = request(url)
|
|
39
|
+
content_type = response.headers["content-type"].to_s
|
|
40
|
+
|
|
41
|
+
if content_type.include?("text/markdown")
|
|
42
|
+
process_markdown_response(url, response)
|
|
43
|
+
else
|
|
44
|
+
process_html_response(url, response)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def load_cookies(path)
|
|
51
|
+
return nil unless path && File.exist?(path)
|
|
52
|
+
|
|
53
|
+
jar = HTTP::CookieJar.new
|
|
54
|
+
jar.load(path, format: :cookiestxt, session: true)
|
|
55
|
+
jar
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def build_client
|
|
59
|
+
HTTPX.plugin(:follow_redirects)
|
|
60
|
+
.with(
|
|
61
|
+
headers: {
|
|
62
|
+
"accept" => "text/markdown, text/html",
|
|
63
|
+
"user-agent" => @config.user_agent
|
|
64
|
+
},
|
|
65
|
+
timeout: { connect_timeout: CONNECT_TIMEOUT, read_timeout: READ_TIMEOUT }
|
|
66
|
+
)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def request(url)
|
|
70
|
+
headers = cookie_headers(url)
|
|
71
|
+
response = @client.get(url, headers: headers)
|
|
72
|
+
raise FetchError, response.error.message if response.is_a?(HTTPX::ErrorResponse)
|
|
73
|
+
|
|
74
|
+
response
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def cookie_headers(url)
|
|
78
|
+
return {} unless @cookie_jar
|
|
79
|
+
|
|
80
|
+
uri = URI.parse(url)
|
|
81
|
+
cookie_value = HTTP::Cookie.cookie_value(@cookie_jar.cookies(uri))
|
|
82
|
+
cookie_value.empty? ? {} : { "cookie" => cookie_value }
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def normalize_encoding(body, content_type)
|
|
86
|
+
body = body.dup
|
|
87
|
+
charset = content_type&.match(/charset=([^\s;]+)/i)&.captures&.first # rubocop:disable Style/SafeNavigationChainLength
|
|
88
|
+
body.force_encoding(charset || "UTF-8")
|
|
89
|
+
body.encode("UTF-8", invalid: :replace, undef: :replace)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def process_markdown_response(url, response)
|
|
93
|
+
body = normalize_encoding(response.body.to_s, response.headers["content-type"])
|
|
94
|
+
frontmatter, content = parse_frontmatter(body)
|
|
95
|
+
token_count, token_estimated = resolve_token_count(content, response.headers["x-markdown-tokens"])
|
|
96
|
+
|
|
97
|
+
Entry.new(
|
|
98
|
+
url:,
|
|
99
|
+
title: frontmatter["title"].to_s,
|
|
100
|
+
description: frontmatter["description"].to_s,
|
|
101
|
+
markdown: body,
|
|
102
|
+
conversion: "cloudflare",
|
|
103
|
+
token_count:,
|
|
104
|
+
token_estimated:
|
|
105
|
+
)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def parse_frontmatter(body)
|
|
109
|
+
if body.start_with?("---")
|
|
110
|
+
parts = body.split("---", 3)
|
|
111
|
+
if parts.length >= 3
|
|
112
|
+
frontmatter = YAML.safe_load(parts[1], permitted_classes: [Time]) || {}
|
|
113
|
+
return [frontmatter, parts[2].lstrip]
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
[{}, body]
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def resolve_token_count(content, header)
|
|
121
|
+
if header
|
|
122
|
+
[header.to_i, false]
|
|
123
|
+
else
|
|
124
|
+
[(content.length / 4.0).ceil, true]
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def process_html_response(url, response)
|
|
129
|
+
html = normalize_encoding(response.body.to_s, response.headers["content-type"])
|
|
130
|
+
document = Nokogiri::HTML(html)
|
|
131
|
+
title, article_html = extract_readable_content(html, document)
|
|
132
|
+
markdown = html_to_markdown(article_html)
|
|
133
|
+
|
|
134
|
+
Entry.new(
|
|
135
|
+
url:,
|
|
136
|
+
title:,
|
|
137
|
+
description: extract_description(document),
|
|
138
|
+
markdown:,
|
|
139
|
+
conversion: "local",
|
|
140
|
+
token_count: (markdown.length / 4.0).ceil,
|
|
141
|
+
token_estimated: true
|
|
142
|
+
)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def extract_readable_content(html, document)
|
|
146
|
+
title, article_html = run_readability(html)
|
|
147
|
+
|
|
148
|
+
if article_html.nil? || article_html.strip.empty?
|
|
149
|
+
warn "Warning: readability extracted no content, falling back to full body"
|
|
150
|
+
article_html = document.at("body")&.inner_html.to_s
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
title = extract_title(document) if title.empty?
|
|
154
|
+
|
|
155
|
+
[clean_title(title), article_html]
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def run_readability(html)
|
|
159
|
+
readable = Readability::Document.new(html, tags: READABILITY_TAGS, attributes: READABILITY_ATTRIBUTES)
|
|
160
|
+
[readable.title.to_s, readable.content]
|
|
161
|
+
rescue StandardError
|
|
162
|
+
["", nil]
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def html_to_markdown(article_html)
|
|
166
|
+
article_html = article_html.encode("UTF-8", invalid: :replace, undef: :replace)
|
|
167
|
+
code_languages = extract_code_languages(article_html)
|
|
168
|
+
markdown = ReverseMarkdown.convert(article_html, github_flavored: true).force_encoding("UTF-8")
|
|
169
|
+
inject_code_languages(markdown, code_languages)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def extract_code_languages(html) # rubocop:disable Metrics/CyclomaticComplexity
|
|
173
|
+
fragment = Nokogiri::HTML.fragment(html)
|
|
174
|
+
|
|
175
|
+
fragment.css("pre").map do |pre|
|
|
176
|
+
pre["lang"] ||
|
|
177
|
+
pre["data-lang"] ||
|
|
178
|
+
pre.css("code").first&.[]("class")&.match(/language-(\w+)/)&.captures&.first # rubocop:disable Style/SafeNavigationChainLength
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def inject_code_languages(markdown, languages) # rubocop:disable Metrics/MethodLength
|
|
183
|
+
index = 0
|
|
184
|
+
|
|
185
|
+
markdown.gsub(/^```\s*$/) do |match|
|
|
186
|
+
if index.even? && (index / 2) < languages.length
|
|
187
|
+
lang = languages[index / 2]
|
|
188
|
+
index += 1
|
|
189
|
+
lang ? "```#{lang}" : match
|
|
190
|
+
else
|
|
191
|
+
index += 1
|
|
192
|
+
match
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def extract_description(document)
|
|
198
|
+
meta = document.at('meta[name="description"]')
|
|
199
|
+
meta ? meta["content"].to_s : ""
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def extract_title(document)
|
|
203
|
+
title_tag = document.at("title")
|
|
204
|
+
title_tag ? title_tag.text.to_s.strip : ""
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def clean_title(title)
|
|
208
|
+
title.sub(/\s*[|–—-]\s*[^|–—-]+\z/, "").strip
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
class FetchError < StandardError; end
|
|
213
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "yaml"
|
|
5
|
+
|
|
6
|
+
module Mddir
|
|
7
|
+
module GlobalIndex
|
|
8
|
+
def self.path(config)
|
|
9
|
+
File.join(config.base_dir, "index.yml")
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.load(config)
|
|
13
|
+
file = path(config)
|
|
14
|
+
return update!(config) unless File.exist?(file)
|
|
15
|
+
|
|
16
|
+
data = YAML.safe_load_file(file, permitted_classes: [Time])
|
|
17
|
+
return update!(config) unless data.is_a?(Hash)
|
|
18
|
+
|
|
19
|
+
data
|
|
20
|
+
rescue Psych::SyntaxError
|
|
21
|
+
update!(config)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def self.update!(config)
|
|
25
|
+
FileUtils.mkdir_p(config.base_dir)
|
|
26
|
+
|
|
27
|
+
collections = build_collections(config)
|
|
28
|
+
|
|
29
|
+
data = {
|
|
30
|
+
"collections" => collections,
|
|
31
|
+
"total_entries" => collections.sum { |_, info| info["entry_count"] },
|
|
32
|
+
"last_updated" => Time.now.utc.iso8601
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
File.write(path(config), YAML.dump(data))
|
|
36
|
+
|
|
37
|
+
data
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def self.build_collections(config)
|
|
41
|
+
Collection.all(config).to_h do |collection|
|
|
42
|
+
[collection.name, {
|
|
43
|
+
"entry_count" => collection.entry_count,
|
|
44
|
+
"last_added" => collection.last_added&.to_s
|
|
45
|
+
}]
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private_class_method :build_collections
|
|
50
|
+
end
|
|
51
|
+
end
|
data/lib/mddir/search.rb
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mddir
|
|
4
|
+
class Search
|
|
5
|
+
Result = Struct.new(:collection_name, :entry, :matches)
|
|
6
|
+
Match = Struct.new(:line_number, :snippet)
|
|
7
|
+
|
|
8
|
+
def initialize(config)
|
|
9
|
+
@config = config
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def search(query, collection_name: nil)
|
|
13
|
+
collections = resolve_collections(collection_name)
|
|
14
|
+
return [] if collections.empty?
|
|
15
|
+
|
|
16
|
+
SearchIndex.open(@config) do |index|
|
|
17
|
+
collections.each { |collection| index.ensure_current!(collection) }
|
|
18
|
+
|
|
19
|
+
rows = index.query(query, collection_names: collections.map(&:name))
|
|
20
|
+
build_results(collections, rows, query)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def resolve_collections(collection_name)
|
|
27
|
+
if collection_name
|
|
28
|
+
collection = Collection.new(collection_name, @config)
|
|
29
|
+
collection.exist? ? [collection] : []
|
|
30
|
+
else
|
|
31
|
+
Collection.all(@config)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def build_results(collections, rows, query)
|
|
36
|
+
entries_lookup = build_entries_lookup(collections)
|
|
37
|
+
grouped = rows.group_by { |row| [row["collection"], row["filename"]] }
|
|
38
|
+
|
|
39
|
+
grouped.filter_map do |(collection_name, filename), file_rows|
|
|
40
|
+
entry = entries_lookup.dig(collection_name, filename)
|
|
41
|
+
next unless entry
|
|
42
|
+
|
|
43
|
+
Result.new(collection_name:, entry:, matches: build_matches(file_rows, query))
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def build_entries_lookup(collections)
|
|
48
|
+
collections.to_h do |collection|
|
|
49
|
+
[collection.name, collection.entries.to_h { |entry| [entry["filename"], entry] }]
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def build_matches(file_rows, query)
|
|
54
|
+
file_rows.map do |row|
|
|
55
|
+
snippet = extract_snippet(row["content"], query)
|
|
56
|
+
Match.new(line_number: row["line_number"].to_i, snippet: snippet)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def extract_snippet(line, query) # rubocop:disable Metrics/AbcSize
|
|
61
|
+
line = line.strip
|
|
62
|
+
index = line.downcase.index(query.downcase)
|
|
63
|
+
return line[0, 120] unless index
|
|
64
|
+
|
|
65
|
+
start = [index - 40, 0].max
|
|
66
|
+
finish = [index + query.length + 80, line.length].min
|
|
67
|
+
snippet = line[start...finish]
|
|
68
|
+
snippet = "...#{snippet}" if start.positive?
|
|
69
|
+
snippet = "#{snippet}..." if finish < line.length
|
|
70
|
+
snippet
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "sqlite3"
|
|
4
|
+
|
|
5
|
+
module Mddir
|
|
6
|
+
class SearchIndex
|
|
7
|
+
def self.open(config)
|
|
8
|
+
index = new(config)
|
|
9
|
+
yield index
|
|
10
|
+
ensure
|
|
11
|
+
index&.close
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize(config)
|
|
15
|
+
@db = SQLite3::Database.new(File.join(config.base_dir, "search.db"))
|
|
16
|
+
@db.results_as_hash = true
|
|
17
|
+
setup_schema
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def ensure_current!(collection)
|
|
21
|
+
row = @db.get_first_row("SELECT indexed_at FROM meta WHERE collection = ?", collection.name)
|
|
22
|
+
return if row && row["indexed_at"] >= index_mtime(collection)
|
|
23
|
+
|
|
24
|
+
reindex(collection)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def query(text, collection_names:)
|
|
28
|
+
escaped = text.gsub('"', '""')
|
|
29
|
+
placeholders = (["?"] * collection_names.size).join(", ")
|
|
30
|
+
|
|
31
|
+
@db.execute(
|
|
32
|
+
"SELECT collection, filename, line_number, content FROM search_lines " \
|
|
33
|
+
"WHERE search_lines MATCH ? AND collection IN (#{placeholders}) ORDER BY rank",
|
|
34
|
+
["\"#{escaped}\"", *collection_names]
|
|
35
|
+
)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def remove_collection!(collection_name)
|
|
39
|
+
@db.execute("DELETE FROM search_lines WHERE collection = ?", collection_name)
|
|
40
|
+
@db.execute("DELETE FROM meta WHERE collection = ?", collection_name)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def close
|
|
44
|
+
@db.close
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def setup_schema
|
|
50
|
+
@db.execute_batch(<<~SQL)
|
|
51
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS search_lines USING fts5(
|
|
52
|
+
collection UNINDEXED,
|
|
53
|
+
filename UNINDEXED,
|
|
54
|
+
line_number UNINDEXED,
|
|
55
|
+
content,
|
|
56
|
+
tokenize='trigram case_sensitive 0'
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
CREATE TABLE IF NOT EXISTS meta (
|
|
60
|
+
collection TEXT PRIMARY KEY,
|
|
61
|
+
indexed_at REAL NOT NULL
|
|
62
|
+
);
|
|
63
|
+
SQL
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def reindex(collection)
|
|
67
|
+
@db.transaction do
|
|
68
|
+
@db.execute("DELETE FROM search_lines WHERE collection = ?", collection.name)
|
|
69
|
+
index_collection_files(collection)
|
|
70
|
+
update_meta(collection)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def index_collection_files(collection)
|
|
75
|
+
collection.entries.each do |entry|
|
|
76
|
+
file_path = File.join(collection.path, entry["filename"])
|
|
77
|
+
next unless File.exist?(file_path)
|
|
78
|
+
|
|
79
|
+
index_file(collection.name, entry["filename"], file_path)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def update_meta(collection)
|
|
84
|
+
@db.execute(
|
|
85
|
+
"INSERT OR REPLACE INTO meta (collection, indexed_at) VALUES (?, ?)",
|
|
86
|
+
[collection.name, index_mtime(collection)]
|
|
87
|
+
)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def index_file(collection_name, filename, file_path)
|
|
91
|
+
lines = File.readlines(file_path, encoding: "UTF-8")
|
|
92
|
+
|
|
93
|
+
Utils.skip_frontmatter(lines).each do |line_number, line|
|
|
94
|
+
next if line.strip.empty?
|
|
95
|
+
|
|
96
|
+
@db.execute(
|
|
97
|
+
"INSERT INTO search_lines (collection, filename, line_number, content) VALUES (?, ?, ?, ?)",
|
|
98
|
+
[collection_name, filename, line_number, line]
|
|
99
|
+
)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def index_mtime(collection)
|
|
104
|
+
File.exist?(collection.index_path) ? File.mtime(collection.index_path).to_f : 0.0
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
data/lib/mddir/server.rb
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "sinatra/base"
|
|
4
|
+
require "kramdown"
|
|
5
|
+
require "kramdown-parser-gfm"
|
|
6
|
+
require "rouge"
|
|
7
|
+
require "uri"
|
|
8
|
+
|
|
9
|
+
module Mddir
|
|
10
|
+
class Server < Sinatra::Base # rubocop:disable Metrics/ClassLength
|
|
11
|
+
set :views, File.expand_path("../../views", __dir__)
|
|
12
|
+
set :public_folder, File.expand_path("../../public", __dir__)
|
|
13
|
+
|
|
14
|
+
enable :method_override
|
|
15
|
+
|
|
16
|
+
before do
|
|
17
|
+
@collection_names = Collection.all(config).map(&:name)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def self.start(config)
|
|
21
|
+
set :mddir_config, config
|
|
22
|
+
set :port, config.port
|
|
23
|
+
set :bind, "localhost"
|
|
24
|
+
|
|
25
|
+
puts "mddir server running at http://localhost:#{config.port}"
|
|
26
|
+
puts "Press Ctrl+C to stop"
|
|
27
|
+
run!
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
helpers do # rubocop:disable Metrics/BlockLength
|
|
31
|
+
def config
|
|
32
|
+
settings.mddir_config
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def format_date(date_str)
|
|
36
|
+
return "" unless date_str
|
|
37
|
+
|
|
38
|
+
Time.parse(date_str.to_s).strftime("%b %d, %Y")
|
|
39
|
+
rescue ArgumentError
|
|
40
|
+
date_str.to_s
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def domain_from_url(url)
|
|
44
|
+
URI.parse(url).host
|
|
45
|
+
rescue URI::InvalidURIError
|
|
46
|
+
url
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def truncate(text, length = 200)
|
|
50
|
+
return "" unless text
|
|
51
|
+
|
|
52
|
+
text.length > length ? "#{text[0, length]}..." : text
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def h(text)
|
|
56
|
+
Rack::Utils.escape_html(text.to_s)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def format_tokens(count)
|
|
60
|
+
return "" unless count
|
|
61
|
+
|
|
62
|
+
count >= 1000 ? "~#{(count / 1000.0).round(1)}k tokens" : "~#{count} tokens"
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def highlight(text, query)
|
|
66
|
+
return h(text) unless query && !query.empty?
|
|
67
|
+
|
|
68
|
+
escaped_query = Regexp.escape(query)
|
|
69
|
+
h(text).gsub(/#{escaped_query}/i) { |m| "<mark>#{m}</mark>" }
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
get "/" do
|
|
74
|
+
@global = GlobalIndex.load(config)
|
|
75
|
+
@collections = (@global["collections"] || {}).sort_by { |name, _| name }
|
|
76
|
+
|
|
77
|
+
erb :home
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
get "/search" do
|
|
81
|
+
@query = params["q"].to_s.strip
|
|
82
|
+
@collection_filter = params["collection"]
|
|
83
|
+
|
|
84
|
+
if @query.empty?
|
|
85
|
+
@results = []
|
|
86
|
+
else
|
|
87
|
+
searcher = Search.new(config)
|
|
88
|
+
@results = searcher.search(@query, collection_name: @collection_filter)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
erb :search
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
get "/:collection" do
|
|
95
|
+
collection = Collection.new(params[:collection], config)
|
|
96
|
+
halt 404, "Collection not found" unless collection.exist?
|
|
97
|
+
|
|
98
|
+
@collection = collection
|
|
99
|
+
@current_collection = collection.name
|
|
100
|
+
@entries = collection.entries.reverse
|
|
101
|
+
|
|
102
|
+
erb :collection
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
get "/:collection/:slug" do
|
|
106
|
+
collection = Collection.new(params[:collection], config)
|
|
107
|
+
halt 404, "Collection not found" unless collection.exist?
|
|
108
|
+
|
|
109
|
+
@collection = collection
|
|
110
|
+
@current_collection = collection.name
|
|
111
|
+
@entry = collection.entries.find { |entry| entry["slug"] == params[:slug] }
|
|
112
|
+
halt 404, "Entry not found" unless @entry
|
|
113
|
+
|
|
114
|
+
file_path = File.join(collection.path, @entry["filename"])
|
|
115
|
+
halt 404, "File not found" unless File.exist?(file_path)
|
|
116
|
+
|
|
117
|
+
raw = File.read(file_path, encoding: "UTF-8")
|
|
118
|
+
content = Utils.strip_frontmatter(raw)
|
|
119
|
+
@html_content = Kramdown::Document.new(
|
|
120
|
+
content,
|
|
121
|
+
input: "GFM",
|
|
122
|
+
syntax_highlighter: :rouge,
|
|
123
|
+
syntax_highlighter_opts: {
|
|
124
|
+
default_lang: "plaintext"
|
|
125
|
+
}
|
|
126
|
+
).to_html
|
|
127
|
+
|
|
128
|
+
erb :reader
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
delete "/:collection" do
|
|
132
|
+
collection = Collection.new(params[:collection], config)
|
|
133
|
+
halt 404, "Collection not found" unless collection.exist?
|
|
134
|
+
|
|
135
|
+
collection.remove!
|
|
136
|
+
|
|
137
|
+
redirect "/"
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
delete "/:collection/:slug" do
|
|
141
|
+
collection = Collection.new(params[:collection], config)
|
|
142
|
+
halt 404, "Collection not found" unless collection.exist?
|
|
143
|
+
|
|
144
|
+
entry = collection.entries.find { |e| e["slug"] == params[:slug] }
|
|
145
|
+
halt 404, "Entry not found" unless entry
|
|
146
|
+
|
|
147
|
+
collection.remove_entry(entry["slug"])
|
|
148
|
+
|
|
149
|
+
redirect "/#{collection.name}"
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
data/lib/mddir/utils.rb
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mddir
|
|
4
|
+
module Utils
|
|
5
|
+
def self.slugify(text)
|
|
6
|
+
text.downcase
|
|
7
|
+
.gsub(/[^a-z0-9]+/, "-")
|
|
8
|
+
.gsub(/-{2,}/, "-")
|
|
9
|
+
.gsub(/\A-|-\z/, "")
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.strip_frontmatter(text)
|
|
13
|
+
if text.start_with?("---")
|
|
14
|
+
parts = text.split("---", 3)
|
|
15
|
+
parts.length >= 3 ? parts[2].lstrip : text
|
|
16
|
+
else
|
|
17
|
+
text
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def self.skip_frontmatter(lines) # rubocop:disable Metrics/MethodLength
|
|
22
|
+
result = []
|
|
23
|
+
in_frontmatter = false
|
|
24
|
+
|
|
25
|
+
lines.each_with_index do |line, index|
|
|
26
|
+
line_number = index + 1
|
|
27
|
+
|
|
28
|
+
if line_number == 1 && line.strip == "---"
|
|
29
|
+
in_frontmatter = true
|
|
30
|
+
next
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
if in_frontmatter && line.strip == "---"
|
|
34
|
+
in_frontmatter = false
|
|
35
|
+
next
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
next if in_frontmatter
|
|
39
|
+
|
|
40
|
+
result << [line_number, line.chomp]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
result
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
data/lib/mddir.rb
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "mddir/version"
|
|
4
|
+
require_relative "mddir/utils"
|
|
5
|
+
require_relative "mddir/config"
|
|
6
|
+
require_relative "mddir/global_index"
|
|
7
|
+
require_relative "mddir/collection"
|
|
8
|
+
require_relative "mddir/entry"
|
|
9
|
+
require_relative "mddir/fetcher"
|
|
10
|
+
require_relative "mddir/search_index"
|
|
11
|
+
require_relative "mddir/search"
|
|
12
|
+
require_relative "mddir/cli"
|
|
13
|
+
|
|
14
|
+
module Mddir
|
|
15
|
+
class Error < StandardError; end
|
|
16
|
+
end
|