mkwebook 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -1
- data/lib/mkwebook/app.rb +216 -18
- data/lib/mkwebook/cli.rb +18 -6
- data/lib/mkwebook/concerns.rb +0 -1
- data/lib/mkwebook/config.rb +18 -7
- data/lib/mkwebook/entry_types.txt +75 -0
- data/lib/mkwebook/ext/string.rb +7 -5
- data/lib/mkwebook/version.rb +1 -1
- data/lib/template/mkwebook.yml +41 -0
- data/mkwebook.gemspec +1 -0
- metadata +17 -3
- data/lib/mkwebook/concerns/global_data_definition.rb +0 -244
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7e29166ba302805e68e70779ef8de58870671aab0ae684d1cec2290f5a0b4bf
|
4
|
+
data.tar.gz: 5e530d48d11ce6c26ac5255b7b294b15b6f90bde7b4ecc4e36ee2bc0e0ea7d54
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b90f0fbd51ad20e65847ca7fde950fc40651c3639a24f28b73c52580547e19e9b93f8e8a60247e3d56046afd2cfb9d758a5903c569b871093c841ad2513a52b
|
7
|
+
data.tar.gz: f5f17d96c4700ddd423fffe592a702812049a21e65ee113cccf12ba5c38b3dbc8af9a1307711ec96c03e4757e12a68c586e15497d33ccbf99e036078962e7cca
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
mkwebook (0.1.
|
4
|
+
mkwebook (0.1.2)
|
5
5
|
activesupport (>= 6.1.5)
|
6
6
|
concurrent-ruby
|
7
7
|
ferrum (>= 0.13)
|
8
|
+
sqlite3 (~> 1.5.4)
|
8
9
|
thor (>= 1.2.1)
|
9
10
|
|
10
11
|
GEM
|
@@ -40,6 +41,7 @@ GEM
|
|
40
41
|
yard (~> 0.9.11)
|
41
42
|
public_suffix (5.0.0)
|
42
43
|
rake (12.3.3)
|
44
|
+
sqlite3 (1.5.4-x86_64-darwin)
|
43
45
|
thor (1.2.1)
|
44
46
|
tzinfo (2.0.5)
|
45
47
|
concurrent-ruby (~> 1.0)
|
data/lib/mkwebook/app.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'fileutils'
|
2
|
+
require 'sqlite3'
|
2
3
|
require 'Mkwebook/config'
|
3
4
|
require 'ferrum'
|
4
5
|
require 'pry-byebug'
|
@@ -14,7 +15,9 @@ module Mkwebook
|
|
14
15
|
Dir.chdir(cli_options[:work_dir])
|
15
16
|
end
|
16
17
|
@cli_options = cli_options
|
17
|
-
@config = Mkwebook::Config.new(@cli_options
|
18
|
+
@config = Mkwebook::Config.new(@cli_options)
|
19
|
+
@downloaded_depth = 0
|
20
|
+
@downloaded_pages = []
|
18
21
|
end
|
19
22
|
|
20
23
|
def create_config
|
@@ -25,20 +28,42 @@ module Mkwebook
|
|
25
28
|
File.join(Mkwebook::GEM_ROOT, 'template', 'mkwebook.yml')
|
26
29
|
end
|
27
30
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
+
def download
|
32
|
+
download_index
|
33
|
+
append_extra_pages
|
34
|
+
download_pages
|
35
|
+
modify_page_links
|
36
|
+
post_process
|
31
37
|
end
|
32
38
|
|
33
39
|
def prepare_browser
|
34
40
|
@browser = Ferrum::Browser.new(browser_options)
|
35
41
|
@browser_context = browser.contexts.create
|
42
|
+
set_auth_info if @config.authentication?
|
36
43
|
end
|
37
44
|
|
38
|
-
def
|
45
|
+
def set_auth_info
|
46
|
+
page = @browser_context.create_page
|
47
|
+
page.go_to(@config[:authentication][:url])
|
48
|
+
if @config[:authentication][:cookies]
|
49
|
+
page.execute("document.cookie = '#{@config[:authentication][:cookies]}'")
|
50
|
+
end
|
51
|
+
|
52
|
+
if @config[:authentication][:local_storage]
|
53
|
+
@config[:authentication][:local_storage].each do |key, value|
|
54
|
+
page.execute("localStorage.setItem('#{key}', '#{value}')")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def download_index(only_index = false)
|
39
60
|
prepare_browser
|
40
61
|
index_page = @browser_context.create_page
|
62
|
+
begin
|
41
63
|
index_page.go_to(@config[:index_page][:url])
|
64
|
+
rescue Ferrum::PendingConnectionsError => e
|
65
|
+
index_page.go_to(@config[:index_page][:url])
|
66
|
+
end
|
42
67
|
index_page.network.wait_for_idle(timeout: 10) rescue nil
|
43
68
|
modifier = @config[:index_page][:modifier]
|
44
69
|
if modifier && File.file?(modifier)
|
@@ -51,8 +76,15 @@ module Mkwebook
|
|
51
76
|
@page_urls = index_elements.flat_map do |element|
|
52
77
|
url = element.css(@config[:index_page][:link_selector]).map { |a| a.evaluate('this.href') }
|
53
78
|
element.css(@config[:index_page][:link_selector]).each do |a|
|
54
|
-
u = a.evaluate('this.href')
|
55
|
-
|
79
|
+
u = a.evaluate('this.href')
|
80
|
+
href = u.normalize_uri('.html').relative_path_from(@config[:index_page][:output])
|
81
|
+
file = @config[:index_page][:output]
|
82
|
+
a.evaluate <<~JS
|
83
|
+
(function(that) {
|
84
|
+
that.setAttribute('data-mkwebook-href', '#{href.gsub("'", "\\\\'")}')
|
85
|
+
that.setAttribute('data-mkwebook-file', '#{file.gsub("'", "\\\\'")}')
|
86
|
+
})(this);
|
87
|
+
JS
|
56
88
|
end
|
57
89
|
url
|
58
90
|
end.uniq
|
@@ -61,9 +93,6 @@ module Mkwebook
|
|
61
93
|
@config[:pages].any? { |page| url =~ Regexp.new(page[:url_pattern]) }
|
62
94
|
end
|
63
95
|
|
64
|
-
@page_urls = @page_urls[0, @cli_options[:limit]] if @cli_options[:limit]
|
65
|
-
|
66
|
-
|
67
96
|
@config[:index_page][:title].try do |title|
|
68
97
|
index_page.execute("document.title = '#{title}'")
|
69
98
|
end
|
@@ -82,17 +111,25 @@ module Mkwebook
|
|
82
111
|
end.join("\n").tap do |html|
|
83
112
|
File.write(@config[:index_page][:output], html)
|
84
113
|
end
|
114
|
+
@downloaded_pages << {file: @config[:index_page][:output], url: @config[:index_page][:url]}
|
115
|
+
modify_page_links if only_index
|
85
116
|
rescue Ferrum::Error => e
|
86
117
|
binding.pry
|
87
118
|
end
|
88
119
|
|
89
|
-
def
|
120
|
+
def download_pages
|
121
|
+
return unless @downloaded_depth < @config[:max_recursion]
|
90
122
|
|
91
123
|
pool = Concurrent::FixedThreadPool.new(@config[:concurrency])
|
92
124
|
|
125
|
+
@page_urls = @page_urls[0, @cli_options[:limit]] if @cli_options[:limit]
|
126
|
+
|
127
|
+
@page_links = @page_urls.map { |url| [url, []] }.to_h
|
128
|
+
|
93
129
|
@page_urls.each do |url|
|
94
130
|
page_config = @config[:pages].find { |page| url =~ Regexp.new(page[:url_pattern]) }
|
95
131
|
next unless page_config
|
132
|
+
next if @downloaded_pages.any? { |page| page[:url] == url }
|
96
133
|
|
97
134
|
pool.post do
|
98
135
|
page = @browser_context.create_page
|
@@ -113,6 +150,13 @@ module Mkwebook
|
|
113
150
|
page.execute("document.title = '#{title}'")
|
114
151
|
end
|
115
152
|
|
153
|
+
if page_link_selector = page_config[:page_link_selector]
|
154
|
+
page_links = page_elements.flat_map do |element|
|
155
|
+
element.css(page_link_selector).map { |a| a.evaluate('this.href') }
|
156
|
+
end.uniq
|
157
|
+
@page_links[url] = page_links
|
158
|
+
end
|
159
|
+
|
116
160
|
page.execute <<-JS
|
117
161
|
for (var e of document.querySelectorAll('[integrity]')) {
|
118
162
|
e.removeAttribute('integrity');
|
@@ -124,18 +168,25 @@ module Mkwebook
|
|
124
168
|
|
125
169
|
page_elements.map do |element|
|
126
170
|
element.css('a').each do |a|
|
127
|
-
u = a.evaluate('this.href')
|
128
|
-
next unless
|
129
|
-
|
130
|
-
|
131
|
-
a.evaluate
|
171
|
+
u = a.evaluate('this.href') rescue nil
|
172
|
+
next unless u.present?
|
173
|
+
href = u.normalize_uri('.html').relative_path_from(url.normalize_uri('.html'))
|
174
|
+
file = u.normalize_file_path('.html')
|
175
|
+
a.evaluate <<~JS
|
176
|
+
(function(that) {
|
177
|
+
that.setAttribute('data-mkwebook-href', '#{href.gsub("'", "\\\\'")}')
|
178
|
+
that.setAttribute('data-mkwebook-file', '#{file.gsub("'", "\\\\'")}')
|
179
|
+
})(this);
|
180
|
+
JS
|
132
181
|
end
|
133
182
|
element.evaluate('this.outerHTML')
|
134
183
|
end.join("\n").tap do |html|
|
135
184
|
FileUtils.mkdir_p(File.dirname(output))
|
136
185
|
File.write(output, html)
|
137
186
|
end
|
138
|
-
|
187
|
+
|
188
|
+
@downloaded_pages << {file: output, url: url}
|
189
|
+
rescue => e
|
139
190
|
$stderr.puts e.message
|
140
191
|
$stderr.puts e.backtrace
|
141
192
|
binding.pry if @cli_options[:pause_on_error]
|
@@ -143,11 +194,30 @@ module Mkwebook
|
|
143
194
|
page.close
|
144
195
|
end
|
145
196
|
end
|
146
|
-
|
147
197
|
end
|
148
198
|
|
149
199
|
pool.shutdown
|
150
200
|
pool.wait_for_termination
|
201
|
+
|
202
|
+
@page_urls = @page_links.flat_map(&:last).uniq
|
203
|
+
@downloaded_depth += 1
|
204
|
+
download_pages
|
205
|
+
end
|
206
|
+
|
207
|
+
def post_process
|
208
|
+
@config[:post_process].try do |script|
|
209
|
+
if File.file?(script)
|
210
|
+
system(script)
|
211
|
+
else
|
212
|
+
system('bash', '-c', script)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
def append_extra_pages
|
218
|
+
@config[:extra_pages]&.each do |url|
|
219
|
+
@page_urls << url
|
220
|
+
end
|
151
221
|
end
|
152
222
|
|
153
223
|
def download_assets(page, assets_config, page_uri = nil)
|
@@ -170,6 +240,134 @@ module Mkwebook
|
|
170
240
|
end
|
171
241
|
end
|
172
242
|
|
243
|
+
def make_docset
|
244
|
+
docset_config = @config[:docset]
|
245
|
+
docset_name = "#{docset_config[:name]}.docset"
|
246
|
+
doc_path = "#{docset_name}/Contents/Resources/Documents"
|
247
|
+
dsidx_path = "#{docset_name}/Contents/Resources/docSet.dsidx"
|
248
|
+
icon_path = "#{docset_name}/icon.png"
|
249
|
+
info = "#{docset_name}/Contents/info.plist"
|
250
|
+
|
251
|
+
if Dir.exist?(docset_name)
|
252
|
+
puts 'Docset directory already exist!'
|
253
|
+
else
|
254
|
+
FileUtils.mkdir_p(doc_path)
|
255
|
+
puts "Create the docset directory!"
|
256
|
+
end
|
257
|
+
|
258
|
+
# Copy files
|
259
|
+
FileUtils.cp_r(Dir.glob("*") - [docset_name], doc_path)
|
260
|
+
puts 'Copy the HTML documentations!'
|
261
|
+
|
262
|
+
# Init SQLite
|
263
|
+
|
264
|
+
FileUtils.rm_f(dsidx_path)
|
265
|
+
db = SQLite3::Database.new(dsidx_path)
|
266
|
+
db.execute <<-SQL
|
267
|
+
CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);
|
268
|
+
SQL
|
269
|
+
db.execute <<-SQL
|
270
|
+
CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);
|
271
|
+
SQL
|
272
|
+
puts 'Create the SQLite Index'
|
273
|
+
|
274
|
+
pages = Dir.glob("#{doc_path}/**/*.html").select do |file|
|
275
|
+
docset_config[:pages].find { |page| file =~ Regexp.new(page[:url_pattern]) }
|
276
|
+
end
|
277
|
+
|
278
|
+
pages = pages[0, @cli_options[:limit]] if @cli_options[:limit]
|
279
|
+
|
280
|
+
prepare_browser
|
281
|
+
|
282
|
+
page = @browser_context.create_page
|
283
|
+
|
284
|
+
elements = pages.flat_map do |file|
|
285
|
+
begin
|
286
|
+
page.go_to("file://#{File.expand_path(file)}")
|
287
|
+
page_config = docset_config[:pages].find { |page| file =~ Regexp.new(page[:url_pattern]) }
|
288
|
+
page.evaluate(page_config[:extractor]) || []
|
289
|
+
rescue => e
|
290
|
+
puts e.message
|
291
|
+
puts e.backtrace
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
elements.uniq.compact.each do |element|
|
296
|
+
name = element['name']
|
297
|
+
type = element['type']
|
298
|
+
path = element['path'].sub(%r{.*\.docset/Contents/Resources/Documents}, '')
|
299
|
+
db.execute('INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?, ?, ?);', [name, type, path])
|
300
|
+
end
|
301
|
+
|
302
|
+
plist_content = <<-PLIST
|
303
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
304
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
305
|
+
<plist version="1.0">
|
306
|
+
<dict>
|
307
|
+
<key>CFBundleIdentifier</key>
|
308
|
+
<string>#{docset_name.sub(/.docset/, '')}</string>
|
309
|
+
<key>CFBundleName</key>
|
310
|
+
<string>#{docset_name.sub(/.docset/, '')}</string>
|
311
|
+
<key>DashDocSetFamily</key>
|
312
|
+
<string>#{docset_name.sub(/.docset/, '')}</string>
|
313
|
+
<key>DocSetPlatformFamily</key>
|
314
|
+
<string>#{docset_config[:keyword] || docset_name.downcaseload.sub(/.docset/, '')}</string>
|
315
|
+
<key>isDashDocset</key>
|
316
|
+
<true/>
|
317
|
+
<key>isJavaScriptEnabled</key>
|
318
|
+
<true/>
|
319
|
+
<key>dashIndexFilePath</key>
|
320
|
+
<string>#{docset_config[:index]}</string>
|
321
|
+
</dict>
|
322
|
+
</plist>
|
323
|
+
PLIST
|
324
|
+
File.open(info, 'w') { |f| f.write(plist_content)}
|
325
|
+
|
326
|
+
# Add icon
|
327
|
+
if docset_config[:icon]
|
328
|
+
if docset_config[:icon].end_with?('.png')
|
329
|
+
FileUtils.cp(docset_config[:icon], icon_path)
|
330
|
+
puts 'Create the icon for docset!'
|
331
|
+
else
|
332
|
+
puts '**Error**: icon file should be a valid PNG image!'
|
333
|
+
exit(2)
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
338
|
+
def list_entry_types
|
339
|
+
puts IO.read("#{__dir__}/entry_types.txt")
|
340
|
+
end
|
341
|
+
|
342
|
+
def modify_page_links
|
343
|
+
pool = Concurrent::FixedThreadPool.new(@config[:concurrency])
|
344
|
+
downloaded_files = @downloaded_pages.map { |page| page[:file] }
|
345
|
+
downloaded_files.each do |file|
|
346
|
+
pool.post do
|
347
|
+
begin
|
348
|
+
page = @browser_context.create_page
|
349
|
+
page.go_to("file://#{File.expand_path(file)}")
|
350
|
+
page.css('a').each do |a|
|
351
|
+
href = a.evaluate('this.getAttribute("data-mkwebook-href")') rescue nil
|
352
|
+
next unless href
|
353
|
+
f = a.evaluate('this.getAttribute("data-mkwebook-file")')
|
354
|
+
next unless href && f && downloaded_files.include?(f)
|
355
|
+
a.evaluate("this.href = this.getAttribute('data-mkwebook-href')")
|
356
|
+
end
|
357
|
+
File.write(file, page.evaluate('document.querySelector("html").outerHTML'))
|
358
|
+
rescue Ferrum::Error => e
|
359
|
+
$stderr.puts e.message
|
360
|
+
$stderr.puts e.backtrace
|
361
|
+
binding.pry if @cli_options[:pause_on_error]
|
362
|
+
ensure
|
363
|
+
page.close
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
367
|
+
pool.shutdown
|
368
|
+
pool.wait_for_termination
|
369
|
+
end
|
370
|
+
|
173
371
|
private
|
174
372
|
|
175
373
|
def browser_options
|
data/lib/mkwebook/cli.rb
CHANGED
@@ -9,6 +9,7 @@ module Mkwebook
|
|
9
9
|
end
|
10
10
|
|
11
11
|
class_option :work_dir, :type => :string, :aliases => '-d', :default => '.', :desc => 'Working directory'
|
12
|
+
class_option :headmode, :type => :boolean, :aliases => '-H', :default => nil, :desc => "Headful mode, this option will override the config file's headless setting"
|
12
13
|
class_option :pause_on_error, :type => :boolean, :aliases => '-e', :default => false, :desc => 'Pause on error, this option will force concurrency off'
|
13
14
|
desc 'init', 'Create config file in current directory'
|
14
15
|
def init
|
@@ -16,18 +17,29 @@ module Mkwebook
|
|
16
17
|
end
|
17
18
|
|
18
19
|
option :pause, :type => :boolean, :aliases => '-p', :desc => 'Pause after processing index page'
|
19
|
-
desc '
|
20
|
-
def
|
21
|
-
Mkwebook::App.new(options).
|
20
|
+
desc 'download_index', 'Download and process index page'
|
21
|
+
def download_index
|
22
|
+
Mkwebook::App.new(options).download_index(true)
|
22
23
|
end
|
23
24
|
|
24
25
|
option :limit, :type => :numeric, :aliases => '-l', :desc => 'Limit number of pages, specially for debugging'
|
25
26
|
option :pause, :type => :boolean, :aliases => '-P', :desc => 'Pause before quit, this option will force concurrency off'
|
26
27
|
option :pause_on_index, :type => :boolean, :aliases => '-p', :desc => 'Pause after processing index page'
|
27
28
|
option :single_thread, :type => :boolean, :aliases => '-s', :desc => 'Force conccurency off'
|
28
|
-
desc '
|
29
|
-
def
|
30
|
-
Mkwebook::App.new(options).
|
29
|
+
desc 'download', 'Download and process html files'
|
30
|
+
def download
|
31
|
+
Mkwebook::App.new(options).download
|
32
|
+
end
|
33
|
+
|
34
|
+
option :limit, :type => :numeric, :aliases => '-l', :desc => 'Limit number of pages, specially for debugging'
|
35
|
+
option :list, :type => :boolean, :aliases => '-L', :desc => 'List all available Dash.app entry types'
|
36
|
+
desc 'docset', 'Create docset'
|
37
|
+
def docset
|
38
|
+
if options[:list]
|
39
|
+
Mkwebook::App.new(options).list_entry_types
|
40
|
+
else
|
41
|
+
Mkwebook::App.new(options).make_docset
|
42
|
+
end
|
31
43
|
end
|
32
44
|
|
33
45
|
desc 'version', 'Print version'
|
data/lib/mkwebook/concerns.rb
CHANGED
@@ -1 +0,0 @@
|
|
1
|
-
require 'mkwebook/concerns/global_data_definition'
|
data/lib/mkwebook/config.rb
CHANGED
@@ -3,29 +3,32 @@ require 'etc'
|
|
3
3
|
|
4
4
|
module Mkwebook
|
5
5
|
class Config < SimpleDelegator
|
6
|
-
attr_accessor :file, :config
|
6
|
+
attr_accessor :file, :config, :cli_options
|
7
7
|
|
8
|
-
def initialize(
|
8
|
+
def initialize(cli_options = {})
|
9
9
|
super(nil)
|
10
|
+
@cli_options = cli_options
|
10
11
|
@file = find_mkwebook_yaml
|
11
12
|
if @file && File.exist?(@file)
|
12
|
-
@config = load(@file
|
13
|
+
@config = load(@file)
|
13
14
|
__setobj__(@config)
|
14
15
|
else
|
15
16
|
__setobj__(self)
|
16
17
|
end
|
17
18
|
end
|
18
19
|
|
19
|
-
def load(config_file
|
20
|
+
def load(config_file)
|
20
21
|
default_config = {
|
21
22
|
'browser' => {
|
22
|
-
'headless' => true
|
23
|
+
'headless' => true,
|
23
24
|
},
|
24
|
-
'concurrency': 1
|
25
|
+
'concurrency': 1,
|
26
|
+
'max-recursion': 1
|
25
27
|
}
|
26
28
|
config = YAML.load_file(config_file)
|
27
29
|
config = default_config.deep_merge(config).deep_transform_keys! { |k| k.to_s.underscore.to_sym }
|
28
|
-
config[:concurrency] = 1 if
|
30
|
+
config[:concurrency] = 1 if force_single_threaded?
|
31
|
+
config[:browser][:headless] = false if @cli_options[:headmode]
|
29
32
|
config
|
30
33
|
end
|
31
34
|
|
@@ -33,6 +36,10 @@ module Mkwebook
|
|
33
36
|
config[:concurrency].present?
|
34
37
|
end
|
35
38
|
|
39
|
+
def authentication?
|
40
|
+
config.dig(:authentication, :cookies).present? || config.dig(:authentication, :local_storage).present?
|
41
|
+
end
|
42
|
+
|
36
43
|
def find_mkwebook_yaml
|
37
44
|
dir = Dir.pwd
|
38
45
|
while dir != '/'
|
@@ -45,5 +52,9 @@ module Mkwebook
|
|
45
52
|
dir = File.dirname(dir)
|
46
53
|
end
|
47
54
|
end
|
55
|
+
|
56
|
+
def force_single_threaded?
|
57
|
+
@cli_options[:pause] || @cli_options[:pause_on_error] || @cli_options[:single_thread]
|
58
|
+
end
|
48
59
|
end
|
49
60
|
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
Annotation
|
2
|
+
Attribute
|
3
|
+
Binding
|
4
|
+
Builtin
|
5
|
+
Callback
|
6
|
+
Category
|
7
|
+
Class
|
8
|
+
Command
|
9
|
+
Component
|
10
|
+
Constant
|
11
|
+
Constructor
|
12
|
+
Define
|
13
|
+
Delegate
|
14
|
+
Diagram
|
15
|
+
Directive
|
16
|
+
Element
|
17
|
+
Entry
|
18
|
+
Enum
|
19
|
+
Environment
|
20
|
+
Error
|
21
|
+
Event
|
22
|
+
Exception
|
23
|
+
Extension
|
24
|
+
Field
|
25
|
+
File
|
26
|
+
Filter
|
27
|
+
Framework
|
28
|
+
Function
|
29
|
+
Global
|
30
|
+
Guide
|
31
|
+
Hook
|
32
|
+
Instance
|
33
|
+
Instruction
|
34
|
+
Interface
|
35
|
+
Keyword
|
36
|
+
Library
|
37
|
+
Literal
|
38
|
+
Macro
|
39
|
+
Method
|
40
|
+
Mixin
|
41
|
+
Modifier
|
42
|
+
Module
|
43
|
+
Namespace
|
44
|
+
Notation
|
45
|
+
Object
|
46
|
+
Operator
|
47
|
+
Option
|
48
|
+
Package
|
49
|
+
Parameter
|
50
|
+
Plugin
|
51
|
+
Procedure
|
52
|
+
Property
|
53
|
+
Protocol
|
54
|
+
Provider
|
55
|
+
Provisioner
|
56
|
+
Query
|
57
|
+
Record
|
58
|
+
Resource
|
59
|
+
Sample
|
60
|
+
Section
|
61
|
+
Service
|
62
|
+
Setting
|
63
|
+
Shortcut
|
64
|
+
Statement
|
65
|
+
Struct
|
66
|
+
Style
|
67
|
+
Subroutine
|
68
|
+
Tag
|
69
|
+
Test
|
70
|
+
Trait
|
71
|
+
Type
|
72
|
+
Union
|
73
|
+
Value
|
74
|
+
Variable
|
75
|
+
Word
|
data/lib/mkwebook/ext/string.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
1
3
|
class String
|
2
4
|
def p
|
3
5
|
puts self
|
@@ -12,24 +14,24 @@ class String
|
|
12
14
|
end
|
13
15
|
|
14
16
|
def normalize_file_path(force_extname = nil)
|
17
|
+
return self unless present?
|
15
18
|
uri = URI.parse(self)
|
16
19
|
file_path = uri.path[1..]
|
17
|
-
extname = File.extname(file_path)
|
20
|
+
extname = force_extname || File.extname(file_path)
|
18
21
|
basename = File.basename(file_path, extname)
|
19
22
|
origin = "#{uri.scheme.try { |s| s + '_' }}#{uri.host}#{uri.port.try { |p| '_' + p.to_s }}"
|
20
23
|
basename += "_#{Digest::MD5.hexdigest(uri.query)}" if uri.query.present?
|
21
|
-
|
22
|
-
File.join(origin, File.dirname(file_path), basename + extname)
|
24
|
+
URI.decode_www_form_component(File.join(origin, File.dirname(file_path), basename + extname))
|
23
25
|
end
|
24
26
|
|
25
27
|
def normalize_uri(force_extname = nil)
|
28
|
+
return self unless present?
|
26
29
|
uri = URI.parse(self)
|
27
30
|
file_path = uri.path[1..]
|
28
|
-
extname = File.extname(file_path)
|
31
|
+
extname = force_extname || File.extname(file_path)
|
29
32
|
basename = File.basename(file_path, extname)
|
30
33
|
basename += "_#{Digest::MD5.hexdigest(uri.query)}" if uri.query.present?
|
31
34
|
origin = "#{uri.scheme.try { |s| s + '_' }}#{uri.host}#{uri.port.try { |p| '_' + p.to_s }}"
|
32
|
-
extname = force_extname if force_extname && extname.empty?
|
33
35
|
file_path = File.join(origin, File.dirname(file_path), basename + extname)
|
34
36
|
if uri.fragment.present?
|
35
37
|
file_path += "##{uri.fragment}"
|
data/lib/mkwebook/version.rb
CHANGED
data/lib/template/mkwebook.yml
CHANGED
@@ -6,6 +6,13 @@ browser: # browser settings
|
|
6
6
|
|
7
7
|
concurrency: 16 # number of concurrent threads, default is no conccurency
|
8
8
|
|
9
|
+
authentication: # authentication settings
|
10
|
+
url: https://example.com/login # any page url which for inject cookie and local storage
|
11
|
+
cookies: "auth_cookie_id=demo" # cookie string to be injected
|
12
|
+
local-storage: # local storage to be injected
|
13
|
+
username: demo # key and value
|
14
|
+
auth_token: demo # key and value
|
15
|
+
|
9
16
|
index-page: # index page settings
|
10
17
|
url: https://clojure.org/guides/repl/introduction # URL of index page
|
11
18
|
title: Clojure Guides # title for the book, use page's title if not set
|
@@ -23,6 +30,7 @@ index-page: # index page settings
|
|
23
30
|
- selector: "script[src]"
|
24
31
|
attr: src
|
25
32
|
|
33
|
+
max-recursion: 2 # max depth of recursive downloading
|
26
34
|
|
27
35
|
pages: # settings for content pages
|
28
36
|
- url-pattern: '.*' # URL pattern for content page, only pages' URL matching this pattern will be processed
|
@@ -34,6 +42,7 @@ pages: # settings for content pages
|
|
34
42
|
style.innerHTML = '.clj-content-container { margin-left: 0; }';
|
35
43
|
document.body.appendChild(style);
|
36
44
|
selector: html # CSS selector for the content to be saved
|
45
|
+
page-link-selector: "a:not([href='../guides'])" # links to be downloaded recursively which are extracted from page content
|
37
46
|
assets: # assets to be downloaded
|
38
47
|
- selector: img # CSS selector for assets
|
39
48
|
attr: src # attribute name for the asset URL
|
@@ -41,4 +50,36 @@ pages: # settings for content pages
|
|
41
50
|
attr: href
|
42
51
|
- selector: "script[src]"
|
43
52
|
attr: src
|
53
|
+
|
54
|
+
extra-pages: # settings for extra pages
|
55
|
+
- https://www.example.com/extra-page-1
|
56
|
+
|
57
|
+
post-process: | # Shell script to be executed after the book is downloaded
|
58
|
+
find . -name '*.html' -exec sed -i 's/https:\/\/clojure.org\/guides\/repl\/introduction/..\/index.html/g' {} \;
|
44
59
|
|
60
|
+
docset: # config for generate docset
|
61
|
+
name: "Clojure Guides" # docset name
|
62
|
+
keyword: "clojure" # docset keyword
|
63
|
+
icon: "clojure.png" # docset icon
|
64
|
+
index: "/index.html" # docset index page
|
65
|
+
pages: # docset pages config
|
66
|
+
- url-pattern: 'index.html' # URL pattern for docset page, will match against local downloaded pages
|
67
|
+
extractor: | # JavaScript code to extract the content for docset page
|
68
|
+
(function() {
|
69
|
+
var links = [...document.querySelectorAll('a.data-url')];
|
70
|
+
return links.map(link => {
|
71
|
+
var name = link.innerText;
|
72
|
+
var tag = link;
|
73
|
+
while (tag.tagName != 'BODY') {
|
74
|
+
tag = tag.parentElement;
|
75
|
+
if (tag.previousElementSibling && tag.previousElementSibling.tagName == 'H1') {
|
76
|
+
name = tag.previousElementSibling.innerText + ' - ' + name;
|
77
|
+
}
|
78
|
+
}
|
79
|
+
return { # Should return an array of objects with name, path, type attributes
|
80
|
+
path: link.href, # path of the page, should be relative to DEMO.docset/Contents/Resources/Documents (starts with /)
|
81
|
+
name: name, # name of extracted element
|
82
|
+
type: 'Guide' # type of extracted element
|
83
|
+
};
|
84
|
+
});
|
85
|
+
})();
|
data/mkwebook.gemspec
CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_dependency 'activesupport', '>= 6.1.5'
|
25
25
|
spec.add_dependency 'concurrent-ruby'
|
26
26
|
spec.add_dependency 'ferrum', '>= 0.13'
|
27
|
+
spec.add_dependency 'sqlite3', '~> 1.5.4'
|
27
28
|
spec.add_dependency 'thor', '>= 1.2.1'
|
28
29
|
|
29
30
|
spec.add_development_dependency 'pry'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mkwebook
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Liu Xiang
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0.13'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: sqlite3
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.5.4
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 1.5.4
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: thor
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -132,8 +146,8 @@ files:
|
|
132
146
|
- lib/mkwebook/cli.rb
|
133
147
|
- lib/mkwebook/commands.rb
|
134
148
|
- lib/mkwebook/concerns.rb
|
135
|
-
- lib/mkwebook/concerns/global_data_definition.rb
|
136
149
|
- lib/mkwebook/config.rb
|
150
|
+
- lib/mkwebook/entry_types.txt
|
137
151
|
- lib/mkwebook/ext.rb
|
138
152
|
- lib/mkwebook/ext/string.rb
|
139
153
|
- lib/mkwebook/version.rb
|
@@ -1,244 +0,0 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
|
-
module Mkwebook
|
4
|
-
module Concerns
|
5
|
-
module GlobalDataDefinition
|
6
|
-
extend ActiveSupport::Concern
|
7
|
-
|
8
|
-
included do
|
9
|
-
|
10
|
-
# Example:
|
11
|
-
#
|
12
|
-
# create_table :post, id: false, primary_key: :id do |t|
|
13
|
-
# t.column :id, :bigint, precison: 19, comment: 'ID'
|
14
|
-
# t.column :name, :string, comment: '名称'
|
15
|
-
# t.column :gmt_created, :datetime, comment: '创建时间'
|
16
|
-
# t.column :gmt_modified, :datetime, comment: '最后修改时间'
|
17
|
-
# end
|
18
|
-
#
|
19
|
-
# Creates a new table with the name +table_name+. +table_name+ may either
|
20
|
-
# be a String or a Symbol.
|
21
|
-
#
|
22
|
-
# There are two ways to work with #create_table. You can use the block
|
23
|
-
# form or the regular form, like this:
|
24
|
-
#
|
25
|
-
# === Block form
|
26
|
-
#
|
27
|
-
# # create_table() passes a TableDefinition object to the block.
|
28
|
-
# # This form will not only create the table, but also columns for the
|
29
|
-
# # table.
|
30
|
-
#
|
31
|
-
# create_table(:suppliers) do |t|
|
32
|
-
# t.column :name, :string, limit: 60
|
33
|
-
# # Other fields here
|
34
|
-
# end
|
35
|
-
#
|
36
|
-
# === Block form, with shorthand
|
37
|
-
#
|
38
|
-
# # You can also use the column types as method calls, rather than calling the column method.
|
39
|
-
# create_table(:suppliers) do |t|
|
40
|
-
# t.string :name, limit: 60
|
41
|
-
# # Other fields here
|
42
|
-
# end
|
43
|
-
#
|
44
|
-
# === Regular form
|
45
|
-
#
|
46
|
-
# # Creates a table called 'suppliers' with no columns.
|
47
|
-
# create_table(:suppliers)
|
48
|
-
# # Add a column to 'suppliers'.
|
49
|
-
# add_column(:suppliers, :name, :string, {limit: 60})
|
50
|
-
#
|
51
|
-
# The +options+ hash can include the following keys:
|
52
|
-
# [<tt>:id</tt>]
|
53
|
-
# Whether to automatically add a primary key column. Defaults to true.
|
54
|
-
# Join tables for {ActiveRecord::Base.has_and_belongs_to_many}[rdoc-ref:Associations::ClassMethods#has_and_belongs_to_many] should set it to false.
|
55
|
-
#
|
56
|
-
# A Symbol can be used to specify the type of the generated primary key column.
|
57
|
-
# [<tt>:primary_key</tt>]
|
58
|
-
# The name of the primary key, if one is to be added automatically.
|
59
|
-
# Defaults to +id+. If <tt>:id</tt> is false, then this option is ignored.
|
60
|
-
#
|
61
|
-
# If an array is passed, a composite primary key will be created.
|
62
|
-
#
|
63
|
-
# Note that Active Record models will automatically detect their
|
64
|
-
# primary key. This can be avoided by using
|
65
|
-
# {self.primary_key=}[rdoc-ref:AttributeMethods::PrimaryKey::ClassMethods#primary_key=] on the model
|
66
|
-
# to define the key explicitly.
|
67
|
-
#
|
68
|
-
# [<tt>:options</tt>]
|
69
|
-
# Any extra options you want appended to the table definition.
|
70
|
-
# [<tt>:temporary</tt>]
|
71
|
-
# Make a temporary table.
|
72
|
-
# [<tt>:force</tt>]
|
73
|
-
# Set to true to drop the table before creating it.
|
74
|
-
# Set to +:cascade+ to drop dependent objects as well.
|
75
|
-
# Defaults to false.
|
76
|
-
# [<tt>:if_not_exists</tt>]
|
77
|
-
# Set to true to avoid raising an error when the table already exists.
|
78
|
-
# Defaults to false.
|
79
|
-
# [<tt>:as</tt>]
|
80
|
-
# SQL to use to generate the table. When this option is used, the block is
|
81
|
-
# ignored, as are the <tt>:id</tt> and <tt>:primary_key</tt> options.
|
82
|
-
#
|
83
|
-
# ====== Add a backend specific option to the generated SQL (MySQL)
|
84
|
-
#
|
85
|
-
# create_table(:suppliers, options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8mb4')
|
86
|
-
#
|
87
|
-
# generates:
|
88
|
-
#
|
89
|
-
# CREATE TABLE suppliers (
|
90
|
-
# id bigint auto_increment PRIMARY KEY
|
91
|
-
# ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
|
92
|
-
#
|
93
|
-
# ====== Rename the primary key column
|
94
|
-
#
|
95
|
-
# create_table(:objects, primary_key: 'guid') do |t|
|
96
|
-
# t.column :name, :string, limit: 80
|
97
|
-
# end
|
98
|
-
#
|
99
|
-
# generates:
|
100
|
-
#
|
101
|
-
# CREATE TABLE objects (
|
102
|
-
# guid bigint auto_increment PRIMARY KEY,
|
103
|
-
# name varchar(80)
|
104
|
-
# )
|
105
|
-
#
|
106
|
-
# ====== Change the primary key column type
|
107
|
-
#
|
108
|
-
# create_table(:tags, id: :string) do |t|
|
109
|
-
# t.column :label, :string
|
110
|
-
# end
|
111
|
-
#
|
112
|
-
# generates:
|
113
|
-
#
|
114
|
-
# CREATE TABLE tags (
|
115
|
-
# id varchar PRIMARY KEY,
|
116
|
-
# label varchar
|
117
|
-
# )
|
118
|
-
#
|
119
|
-
# ====== Create a composite primary key
|
120
|
-
#
|
121
|
-
# create_table(:orders, primary_key: [:product_id, :client_id]) do |t|
|
122
|
-
# t.belongs_to :product
|
123
|
-
# t.belongs_to :client
|
124
|
-
# end
|
125
|
-
#
|
126
|
-
# generates:
|
127
|
-
#
|
128
|
-
# CREATE TABLE order (
|
129
|
-
# product_id bigint NOT NULL,
|
130
|
-
# client_id bigint NOT NULL
|
131
|
-
# );
|
132
|
-
#
|
133
|
-
# ALTER TABLE ONLY "orders"
|
134
|
-
# ADD CONSTRAINT orders_pkey PRIMARY KEY (product_id, client_id);
|
135
|
-
#
|
136
|
-
# ====== Do not add a primary key column
|
137
|
-
#
|
138
|
-
# create_table(:categories_suppliers, id: false) do |t|
|
139
|
-
# t.column :category_id, :bigint
|
140
|
-
# t.column :supplier_id, :bigint
|
141
|
-
# end
|
142
|
-
#
|
143
|
-
# generates:
|
144
|
-
#
|
145
|
-
# CREATE TABLE categories_suppliers (
|
146
|
-
# category_id bigint,
|
147
|
-
# supplier_id bigint
|
148
|
-
# )
|
149
|
-
#
|
150
|
-
# ====== Create a temporary table based on a query
|
151
|
-
#
|
152
|
-
# create_table(:long_query, temporary: true,
|
153
|
-
# as: "SELECT * FROM orders INNER JOIN line_items ON order_id=orders.id")
|
154
|
-
#
|
155
|
-
# generates:
|
156
|
-
#
|
157
|
-
# CREATE TEMPORARY TABLE long_query AS
|
158
|
-
# SELECT * FROM orders INNER JOIN line_items ON order_id=orders.id
|
159
|
-
#
|
160
|
-
# See also TableDefinition#column for details on how to create columns.
|
161
|
-
def create_table(table_name, **options, &blk)
|
162
|
-
ActiveRecord::Base.connection.create_table(table_name, **options, &blk)
|
163
|
-
end
|
164
|
-
|
165
|
-
# Creates a new join table with the name created using the lexical order of the first two
|
166
|
-
# arguments. These arguments can be a String or a Symbol.
|
167
|
-
#
|
168
|
-
# # Creates a table called 'assemblies_parts' with no id.
|
169
|
-
# create_join_table(:assemblies, :parts)
|
170
|
-
#
|
171
|
-
# You can pass an +options+ hash which can include the following keys:
|
172
|
-
# [<tt>:table_name</tt>]
|
173
|
-
# Sets the table name, overriding the default.
|
174
|
-
# [<tt>:column_options</tt>]
|
175
|
-
# Any extra options you want appended to the columns definition.
|
176
|
-
# [<tt>:options</tt>]
|
177
|
-
# Any extra options you want appended to the table definition.
|
178
|
-
# [<tt>:temporary</tt>]
|
179
|
-
# Make a temporary table.
|
180
|
-
# [<tt>:force</tt>]
|
181
|
-
# Set to true to drop the table before creating it.
|
182
|
-
# Defaults to false.
|
183
|
-
#
|
184
|
-
# Note that #create_join_table does not create any indices by default; you can use
|
185
|
-
# its block form to do so yourself:
|
186
|
-
#
|
187
|
-
# create_join_table :products, :categories do |t|
|
188
|
-
# t.index :product_id
|
189
|
-
# t.index :category_id
|
190
|
-
# end
|
191
|
-
#
|
192
|
-
# ====== Add a backend specific option to the generated SQL (MySQL)
|
193
|
-
#
|
194
|
-
# create_join_table(:assemblies, :parts, options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8')
|
195
|
-
#
|
196
|
-
# generates:
|
197
|
-
#
|
198
|
-
# CREATE TABLE assemblies_parts (
|
199
|
-
# assembly_id bigint NOT NULL,
|
200
|
-
# part_id bigint NOT NULL,
|
201
|
-
# ) ENGINE=InnoDB DEFAULT CHARSET=utf8
|
202
|
-
#
|
203
|
-
def create_join_table(table_1, table_2, column_options: {}, **options)
|
204
|
-
ActiveRecord::Base.connection.create_join_table(table_1, table_2, column_options, **options)
|
205
|
-
end
|
206
|
-
|
207
|
-
# Drops a table from the database.
|
208
|
-
#
|
209
|
-
# [<tt>:force</tt>]
|
210
|
-
# Set to +:cascade+ to drop dependent objects as well.
|
211
|
-
# Defaults to false.
|
212
|
-
# [<tt>:if_exists</tt>]
|
213
|
-
# Set to +true+ to only drop the table if it exists.
|
214
|
-
# Defaults to false.
|
215
|
-
#
|
216
|
-
# Although this command ignores most +options+ and the block if one is given,
|
217
|
-
# it can be helpful to provide these in a migration's +change+ method so it can be reverted.
|
218
|
-
# In that case, +options+ and the block will be used by #create_table.
|
219
|
-
def drop_table(table_name, **options)
|
220
|
-
ActiveRecord::Base.connection.drop_table(table_name, **options)
|
221
|
-
end
|
222
|
-
|
223
|
-
# Drops the join table specified by the given arguments.
|
224
|
-
# See #create_join_table for details.
|
225
|
-
#
|
226
|
-
# Although this command ignores the block if one is given, it can be helpful
|
227
|
-
# to provide one in a migration's +change+ method so it can be reverted.
|
228
|
-
# In that case, the block will be used by #create_join_table.
|
229
|
-
def drop_join_table(table_1, table_2, **options)
|
230
|
-
ActiveRecord::Base.connection.drop_join_table(table_1, table_2, **options)
|
231
|
-
end
|
232
|
-
|
233
|
-
# Renames a table.
|
234
|
-
#
|
235
|
-
# rename_table('octopuses', 'octopi')
|
236
|
-
#
|
237
|
-
def rename_table(table_name, new_name)
|
238
|
-
ActiveRecord::Base.connection.rename_table(table_name, new_name)
|
239
|
-
end
|
240
|
-
|
241
|
-
end
|
242
|
-
end
|
243
|
-
end
|
244
|
-
end
|