mkwebook 0.1.0 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 91b2c6fce12ddd620f497fc5a902c000525d8581e34ede7107ac55b2771871e4
4
- data.tar.gz: 11ca2232b8c30848b352737eb9afebafbfda49e0a974b93586627390a524d977
3
+ metadata.gz: a7e29166ba302805e68e70779ef8de58870671aab0ae684d1cec2290f5a0b4bf
4
+ data.tar.gz: 5e530d48d11ce6c26ac5255b7b294b15b6f90bde7b4ecc4e36ee2bc0e0ea7d54
5
5
  SHA512:
6
- metadata.gz: 7aaafd73130c773b6f2b5a942ab525ee95fb84a4a7b01e4ee890edabc1554563e9f3c6dc3fbd36b3321212a50969006e1a9bfd5efb0e0028a92b18bae4df319d
7
- data.tar.gz: c8578b37ba25133d81e487f5486c0eba9c16712d048b64e0bf2336ef28a0310ce5b93994bc43634c692e671d043a1b97a70124d0ddf616e9330ebf669e0c32ea
6
+ metadata.gz: 1b90f0fbd51ad20e65847ca7fde950fc40651c3639a24f28b73c52580547e19e9b93f8e8a60247e3d56046afd2cfb9d758a5903c569b871093c841ad2513a52b
7
+ data.tar.gz: f5f17d96c4700ddd423fffe592a702812049a21e65ee113cccf12ba5c38b3dbc8af9a1307711ec96c03e4757e12a68c586e15497d33ccbf99e036078962e7cca
data/Gemfile.lock CHANGED
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- mkwebook (0.1.0)
4
+ mkwebook (0.1.2)
5
5
  activesupport (>= 6.1.5)
6
6
  concurrent-ruby
7
7
  ferrum (>= 0.13)
8
+ sqlite3 (~> 1.5.4)
8
9
  thor (>= 1.2.1)
9
10
 
10
11
  GEM
@@ -40,6 +41,7 @@ GEM
40
41
  yard (~> 0.9.11)
41
42
  public_suffix (5.0.0)
42
43
  rake (12.3.3)
44
+ sqlite3 (1.5.4-x86_64-darwin)
43
45
  thor (1.2.1)
44
46
  tzinfo (2.0.5)
45
47
  concurrent-ruby (~> 1.0)
data/lib/mkwebook/app.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require 'sqlite3'
2
3
  require 'Mkwebook/config'
3
4
  require 'ferrum'
4
5
  require 'pry-byebug'
@@ -14,7 +15,9 @@ module Mkwebook
14
15
  Dir.chdir(cli_options[:work_dir])
15
16
  end
16
17
  @cli_options = cli_options
17
- @config = Mkwebook::Config.new(@cli_options[:pause] || @cli_options[:pause_on_error] || @cli_options[:single_thread])
18
+ @config = Mkwebook::Config.new(@cli_options)
19
+ @downloaded_depth = 0
20
+ @downloaded_pages = []
18
21
  end
19
22
 
20
23
  def create_config
@@ -25,20 +28,42 @@ module Mkwebook
25
28
  File.join(Mkwebook::GEM_ROOT, 'template', 'mkwebook.yml')
26
29
  end
27
30
 
28
- def make
29
- make_index
30
- make_pages
31
+ def download
32
+ download_index
33
+ append_extra_pages
34
+ download_pages
35
+ modify_page_links
36
+ post_process
31
37
  end
32
38
 
33
39
  def prepare_browser
34
40
  @browser = Ferrum::Browser.new(browser_options)
35
41
  @browser_context = browser.contexts.create
42
+ set_auth_info if @config.authentication?
36
43
  end
37
44
 
38
- def make_index
45
+ def set_auth_info
46
+ page = @browser_context.create_page
47
+ page.go_to(@config[:authentication][:url])
48
+ if @config[:authentication][:cookies]
49
+ page.execute("document.cookie = '#{@config[:authentication][:cookies]}'")
50
+ end
51
+
52
+ if @config[:authentication][:local_storage]
53
+ @config[:authentication][:local_storage].each do |key, value|
54
+ page.execute("localStorage.setItem('#{key}', '#{value}')")
55
+ end
56
+ end
57
+ end
58
+
59
+ def download_index(only_index = false)
39
60
  prepare_browser
40
61
  index_page = @browser_context.create_page
62
+ begin
41
63
  index_page.go_to(@config[:index_page][:url])
64
+ rescue Ferrum::PendingConnectionsError => e
65
+ index_page.go_to(@config[:index_page][:url])
66
+ end
42
67
  index_page.network.wait_for_idle(timeout: 10) rescue nil
43
68
  modifier = @config[:index_page][:modifier]
44
69
  if modifier && File.file?(modifier)
@@ -51,8 +76,15 @@ module Mkwebook
51
76
  @page_urls = index_elements.flat_map do |element|
52
77
  url = element.css(@config[:index_page][:link_selector]).map { |a| a.evaluate('this.href') }
53
78
  element.css(@config[:index_page][:link_selector]).each do |a|
54
- u = a.evaluate('this.href').normalize_uri('.html').relative_path_from(@config[:index_page][:output])
55
- a.evaluate("this.href = '#{u}'")
79
+ u = a.evaluate('this.href')
80
+ href = u.normalize_uri('.html').relative_path_from(@config[:index_page][:output])
81
+ file = @config[:index_page][:output]
82
+ a.evaluate <<~JS
83
+ (function(that) {
84
+ that.setAttribute('data-mkwebook-href', '#{href.gsub("'", "\\\\'")}')
85
+ that.setAttribute('data-mkwebook-file', '#{file.gsub("'", "\\\\'")}')
86
+ })(this);
87
+ JS
56
88
  end
57
89
  url
58
90
  end.uniq
@@ -61,9 +93,6 @@ module Mkwebook
61
93
  @config[:pages].any? { |page| url =~ Regexp.new(page[:url_pattern]) }
62
94
  end
63
95
 
64
- @page_urls = @page_urls[0, @cli_options[:limit]] if @cli_options[:limit]
65
-
66
-
67
96
  @config[:index_page][:title].try do |title|
68
97
  index_page.execute("document.title = '#{title}'")
69
98
  end
@@ -82,17 +111,25 @@ module Mkwebook
82
111
  end.join("\n").tap do |html|
83
112
  File.write(@config[:index_page][:output], html)
84
113
  end
114
+ @downloaded_pages << {file: @config[:index_page][:output], url: @config[:index_page][:url]}
115
+ modify_page_links if only_index
85
116
  rescue Ferrum::Error => e
86
117
  binding.pry
87
118
  end
88
119
 
89
- def make_pages
120
+ def download_pages
121
+ return unless @downloaded_depth < @config[:max_recursion]
90
122
 
91
123
  pool = Concurrent::FixedThreadPool.new(@config[:concurrency])
92
124
 
125
+ @page_urls = @page_urls[0, @cli_options[:limit]] if @cli_options[:limit]
126
+
127
+ @page_links = @page_urls.map { |url| [url, []] }.to_h
128
+
93
129
  @page_urls.each do |url|
94
130
  page_config = @config[:pages].find { |page| url =~ Regexp.new(page[:url_pattern]) }
95
131
  next unless page_config
132
+ next if @downloaded_pages.any? { |page| page[:url] == url }
96
133
 
97
134
  pool.post do
98
135
  page = @browser_context.create_page
@@ -113,6 +150,13 @@ module Mkwebook
113
150
  page.execute("document.title = '#{title}'")
114
151
  end
115
152
 
153
+ if page_link_selector = page_config[:page_link_selector]
154
+ page_links = page_elements.flat_map do |element|
155
+ element.css(page_link_selector).map { |a| a.evaluate('this.href') }
156
+ end.uniq
157
+ @page_links[url] = page_links
158
+ end
159
+
116
160
  page.execute <<-JS
117
161
  for (var e of document.querySelectorAll('[integrity]')) {
118
162
  e.removeAttribute('integrity');
@@ -124,18 +168,25 @@ module Mkwebook
124
168
 
125
169
  page_elements.map do |element|
126
170
  element.css('a').each do |a|
127
- u = a.evaluate('this.href')
128
- next unless @page_urls.include?(u)
129
-
130
- u = u.normalize_uri('.html').relative_path_from(url.normalize_uri('.html'))
131
- a.evaluate("this.href = '#{u}'")
171
+ u = a.evaluate('this.href') rescue nil
172
+ next unless u.present?
173
+ href = u.normalize_uri('.html').relative_path_from(url.normalize_uri('.html'))
174
+ file = u.normalize_file_path('.html')
175
+ a.evaluate <<~JS
176
+ (function(that) {
177
+ that.setAttribute('data-mkwebook-href', '#{href.gsub("'", "\\\\'")}')
178
+ that.setAttribute('data-mkwebook-file', '#{file.gsub("'", "\\\\'")}')
179
+ })(this);
180
+ JS
132
181
  end
133
182
  element.evaluate('this.outerHTML')
134
183
  end.join("\n").tap do |html|
135
184
  FileUtils.mkdir_p(File.dirname(output))
136
185
  File.write(output, html)
137
186
  end
138
- rescue Ferrum::Error => e
187
+
188
+ @downloaded_pages << {file: output, url: url}
189
+ rescue => e
139
190
  $stderr.puts e.message
140
191
  $stderr.puts e.backtrace
141
192
  binding.pry if @cli_options[:pause_on_error]
@@ -143,11 +194,30 @@ module Mkwebook
143
194
  page.close
144
195
  end
145
196
  end
146
-
147
197
  end
148
198
 
149
199
  pool.shutdown
150
200
  pool.wait_for_termination
201
+
202
+ @page_urls = @page_links.flat_map(&:last).uniq
203
+ @downloaded_depth += 1
204
+ download_pages
205
+ end
206
+
207
+ def post_process
208
+ @config[:post_process].try do |script|
209
+ if File.file?(script)
210
+ system(script)
211
+ else
212
+ system('bash', '-c', script)
213
+ end
214
+ end
215
+ end
216
+
217
+ def append_extra_pages
218
+ @config[:extra_pages]&.each do |url|
219
+ @page_urls << url
220
+ end
151
221
  end
152
222
 
153
223
  def download_assets(page, assets_config, page_uri = nil)
@@ -170,6 +240,134 @@ module Mkwebook
170
240
  end
171
241
  end
172
242
 
243
+ def make_docset
244
+ docset_config = @config[:docset]
245
+ docset_name = "#{docset_config[:name]}.docset"
246
+ doc_path = "#{docset_name}/Contents/Resources/Documents"
247
+ dsidx_path = "#{docset_name}/Contents/Resources/docSet.dsidx"
248
+ icon_path = "#{docset_name}/icon.png"
249
+ info = "#{docset_name}/Contents/info.plist"
250
+
251
+ if Dir.exist?(docset_name)
252
+ puts 'Docset directory already exist!'
253
+ else
254
+ FileUtils.mkdir_p(doc_path)
255
+ puts "Create the docset directory!"
256
+ end
257
+
258
+ # Copy files
259
+ FileUtils.cp_r(Dir.glob("*") - [docset_name], doc_path)
260
+ puts 'Copy the HTML documentations!'
261
+
262
+ # Init SQLite
263
+
264
+ FileUtils.rm_f(dsidx_path)
265
+ db = SQLite3::Database.new(dsidx_path)
266
+ db.execute <<-SQL
267
+ CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);
268
+ SQL
269
+ db.execute <<-SQL
270
+ CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);
271
+ SQL
272
+ puts 'Create the SQLite Index'
273
+
274
+ pages = Dir.glob("#{doc_path}/**/*.html").select do |file|
275
+ docset_config[:pages].find { |page| file =~ Regexp.new(page[:url_pattern]) }
276
+ end
277
+
278
+ pages = pages[0, @cli_options[:limit]] if @cli_options[:limit]
279
+
280
+ prepare_browser
281
+
282
+ page = @browser_context.create_page
283
+
284
+ elements = pages.flat_map do |file|
285
+ begin
286
+ page.go_to("file://#{File.expand_path(file)}")
287
+ page_config = docset_config[:pages].find { |page| file =~ Regexp.new(page[:url_pattern]) }
288
+ page.evaluate(page_config[:extractor]) || []
289
+ rescue => e
290
+ puts e.message
291
+ puts e.backtrace
292
+ end
293
+ end
294
+
295
+ elements.uniq.compact.each do |element|
296
+ name = element['name']
297
+ type = element['type']
298
+ path = element['path'].sub(%r{.*\.docset/Contents/Resources/Documents}, '')
299
+ db.execute('INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?, ?, ?);', [name, type, path])
300
+ end
301
+
302
+ plist_content = <<-PLIST
303
+ <?xml version="1.0" encoding="UTF-8"?>
304
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
305
+ <plist version="1.0">
306
+ <dict>
307
+ <key>CFBundleIdentifier</key>
308
+ <string>#{docset_name.sub(/.docset/, '')}</string>
309
+ <key>CFBundleName</key>
310
+ <string>#{docset_name.sub(/.docset/, '')}</string>
311
+ <key>DashDocSetFamily</key>
312
+ <string>#{docset_name.sub(/.docset/, '')}</string>
313
+ <key>DocSetPlatformFamily</key>
314
+ <string>#{docset_config[:keyword] || docset_name.downcaseload.sub(/.docset/, '')}</string>
315
+ <key>isDashDocset</key>
316
+ <true/>
317
+ <key>isJavaScriptEnabled</key>
318
+ <true/>
319
+ <key>dashIndexFilePath</key>
320
+ <string>#{docset_config[:index]}</string>
321
+ </dict>
322
+ </plist>
323
+ PLIST
324
+ File.open(info, 'w') { |f| f.write(plist_content)}
325
+
326
+ # Add icon
327
+ if docset_config[:icon]
328
+ if docset_config[:icon].end_with?('.png')
329
+ FileUtils.cp(docset_config[:icon], icon_path)
330
+ puts 'Create the icon for docset!'
331
+ else
332
+ puts '**Error**: icon file should be a valid PNG image!'
333
+ exit(2)
334
+ end
335
+ end
336
+ end
337
+
338
+ def list_entry_types
339
+ puts IO.read("#{__dir__}/entry_types.txt")
340
+ end
341
+
342
+ def modify_page_links
343
+ pool = Concurrent::FixedThreadPool.new(@config[:concurrency])
344
+ downloaded_files = @downloaded_pages.map { |page| page[:file] }
345
+ downloaded_files.each do |file|
346
+ pool.post do
347
+ begin
348
+ page = @browser_context.create_page
349
+ page.go_to("file://#{File.expand_path(file)}")
350
+ page.css('a').each do |a|
351
+ href = a.evaluate('this.getAttribute("data-mkwebook-href")') rescue nil
352
+ next unless href
353
+ f = a.evaluate('this.getAttribute("data-mkwebook-file")')
354
+ next unless href && f && downloaded_files.include?(f)
355
+ a.evaluate("this.href = this.getAttribute('data-mkwebook-href')")
356
+ end
357
+ File.write(file, page.evaluate('document.querySelector("html").outerHTML'))
358
+ rescue Ferrum::Error => e
359
+ $stderr.puts e.message
360
+ $stderr.puts e.backtrace
361
+ binding.pry if @cli_options[:pause_on_error]
362
+ ensure
363
+ page.close
364
+ end
365
+ end
366
+ end
367
+ pool.shutdown
368
+ pool.wait_for_termination
369
+ end
370
+
173
371
  private
174
372
 
175
373
  def browser_options
data/lib/mkwebook/cli.rb CHANGED
@@ -9,6 +9,7 @@ module Mkwebook
9
9
  end
10
10
 
11
11
  class_option :work_dir, :type => :string, :aliases => '-d', :default => '.', :desc => 'Working directory'
12
+ class_option :headmode, :type => :boolean, :aliases => '-H', :default => nil, :desc => "Headful mode, this option will override the config file's headless setting"
12
13
  class_option :pause_on_error, :type => :boolean, :aliases => '-e', :default => false, :desc => 'Pause on error, this option will force concurrency off'
13
14
  desc 'init', 'Create config file in current directory'
14
15
  def init
@@ -16,18 +17,29 @@ module Mkwebook
16
17
  end
17
18
 
18
19
  option :pause, :type => :boolean, :aliases => '-p', :desc => 'Pause after processing index page'
19
- desc 'make_index', 'Download and process index page'
20
- def make_index
21
- Mkwebook::App.new(options).make_index
20
+ desc 'download_index', 'Download and process index page'
21
+ def download_index
22
+ Mkwebook::App.new(options).download_index(true)
22
23
  end
23
24
 
24
25
  option :limit, :type => :numeric, :aliases => '-l', :desc => 'Limit number of pages, specially for debugging'
25
26
  option :pause, :type => :boolean, :aliases => '-P', :desc => 'Pause before quit, this option will force concurrency off'
26
27
  option :pause_on_index, :type => :boolean, :aliases => '-p', :desc => 'Pause after processing index page'
27
28
  option :single_thread, :type => :boolean, :aliases => '-s', :desc => 'Force conccurency off'
28
- desc 'make', 'Download and process html files'
29
- def make
30
- Mkwebook::App.new(options).make
29
+ desc 'download', 'Download and process html files'
30
+ def download
31
+ Mkwebook::App.new(options).download
32
+ end
33
+
34
+ option :limit, :type => :numeric, :aliases => '-l', :desc => 'Limit number of pages, specially for debugging'
35
+ option :list, :type => :boolean, :aliases => '-L', :desc => 'List all available Dash.app entry types'
36
+ desc 'docset', 'Create docset'
37
+ def docset
38
+ if options[:list]
39
+ Mkwebook::App.new(options).list_entry_types
40
+ else
41
+ Mkwebook::App.new(options).make_docset
42
+ end
31
43
  end
32
44
 
33
45
  desc 'version', 'Print version'
@@ -1 +0,0 @@
1
- require 'mkwebook/concerns/global_data_definition'
@@ -3,29 +3,32 @@ require 'etc'
3
3
 
4
4
  module Mkwebook
5
5
  class Config < SimpleDelegator
6
- attr_accessor :file, :config
6
+ attr_accessor :file, :config, :cli_options
7
7
 
8
- def initialize(force_concurrency_off)
8
+ def initialize(cli_options = {})
9
9
  super(nil)
10
+ @cli_options = cli_options
10
11
  @file = find_mkwebook_yaml
11
12
  if @file && File.exist?(@file)
12
- @config = load(@file, force_concurrency_off)
13
+ @config = load(@file)
13
14
  __setobj__(@config)
14
15
  else
15
16
  __setobj__(self)
16
17
  end
17
18
  end
18
19
 
19
- def load(config_file, force_concurrency_off)
20
+ def load(config_file)
20
21
  default_config = {
21
22
  'browser' => {
22
- 'headless' => true
23
+ 'headless' => true,
23
24
  },
24
- 'concurrency': 1
25
+ 'concurrency': 1,
26
+ 'max-recursion': 1
25
27
  }
26
28
  config = YAML.load_file(config_file)
27
29
  config = default_config.deep_merge(config).deep_transform_keys! { |k| k.to_s.underscore.to_sym }
28
- config[:concurrency] = 1 if force_concurrency_off
30
+ config[:concurrency] = 1 if force_single_threaded?
31
+ config[:browser][:headless] = false if @cli_options[:headmode]
29
32
  config
30
33
  end
31
34
 
@@ -33,6 +36,10 @@ module Mkwebook
33
36
  config[:concurrency].present?
34
37
  end
35
38
 
39
+ def authentication?
40
+ config.dig(:authentication, :cookies).present? || config.dig(:authentication, :local_storage).present?
41
+ end
42
+
36
43
  def find_mkwebook_yaml
37
44
  dir = Dir.pwd
38
45
  while dir != '/'
@@ -45,5 +52,9 @@ module Mkwebook
45
52
  dir = File.dirname(dir)
46
53
  end
47
54
  end
55
+
56
+ def force_single_threaded?
57
+ @cli_options[:pause] || @cli_options[:pause_on_error] || @cli_options[:single_thread]
58
+ end
48
59
  end
49
60
  end
@@ -0,0 +1,75 @@
1
+ Annotation
2
+ Attribute
3
+ Binding
4
+ Builtin
5
+ Callback
6
+ Category
7
+ Class
8
+ Command
9
+ Component
10
+ Constant
11
+ Constructor
12
+ Define
13
+ Delegate
14
+ Diagram
15
+ Directive
16
+ Element
17
+ Entry
18
+ Enum
19
+ Environment
20
+ Error
21
+ Event
22
+ Exception
23
+ Extension
24
+ Field
25
+ File
26
+ Filter
27
+ Framework
28
+ Function
29
+ Global
30
+ Guide
31
+ Hook
32
+ Instance
33
+ Instruction
34
+ Interface
35
+ Keyword
36
+ Library
37
+ Literal
38
+ Macro
39
+ Method
40
+ Mixin
41
+ Modifier
42
+ Module
43
+ Namespace
44
+ Notation
45
+ Object
46
+ Operator
47
+ Option
48
+ Package
49
+ Parameter
50
+ Plugin
51
+ Procedure
52
+ Property
53
+ Protocol
54
+ Provider
55
+ Provisioner
56
+ Query
57
+ Record
58
+ Resource
59
+ Sample
60
+ Section
61
+ Service
62
+ Setting
63
+ Shortcut
64
+ Statement
65
+ Struct
66
+ Style
67
+ Subroutine
68
+ Tag
69
+ Test
70
+ Trait
71
+ Type
72
+ Union
73
+ Value
74
+ Variable
75
+ Word
@@ -1,3 +1,5 @@
1
+ require 'uri'
2
+
1
3
  class String
2
4
  def p
3
5
  puts self
@@ -12,24 +14,24 @@ class String
12
14
  end
13
15
 
14
16
  def normalize_file_path(force_extname = nil)
17
+ return self unless present?
15
18
  uri = URI.parse(self)
16
19
  file_path = uri.path[1..]
17
- extname = File.extname(file_path)
20
+ extname = force_extname || File.extname(file_path)
18
21
  basename = File.basename(file_path, extname)
19
22
  origin = "#{uri.scheme.try { |s| s + '_' }}#{uri.host}#{uri.port.try { |p| '_' + p.to_s }}"
20
23
  basename += "_#{Digest::MD5.hexdigest(uri.query)}" if uri.query.present?
21
- extname = force_extname if force_extname && extname.empty?
22
- File.join(origin, File.dirname(file_path), basename + extname)
24
+ URI.decode_www_form_component(File.join(origin, File.dirname(file_path), basename + extname))
23
25
  end
24
26
 
25
27
  def normalize_uri(force_extname = nil)
28
+ return self unless present?
26
29
  uri = URI.parse(self)
27
30
  file_path = uri.path[1..]
28
- extname = File.extname(file_path)
31
+ extname = force_extname || File.extname(file_path)
29
32
  basename = File.basename(file_path, extname)
30
33
  basename += "_#{Digest::MD5.hexdigest(uri.query)}" if uri.query.present?
31
34
  origin = "#{uri.scheme.try { |s| s + '_' }}#{uri.host}#{uri.port.try { |p| '_' + p.to_s }}"
32
- extname = force_extname if force_extname && extname.empty?
33
35
  file_path = File.join(origin, File.dirname(file_path), basename + extname)
34
36
  if uri.fragment.present?
35
37
  file_path += "##{uri.fragment}"
@@ -1,3 +1,3 @@
1
1
  module Mkwebook
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.2"
3
3
  end
@@ -6,6 +6,13 @@ browser: # browser settings
6
6
 
7
7
  concurrency: 16 # number of concurrent threads, default is no conccurency
8
8
 
9
+ authentication: # authentication settings
10
+ url: https://example.com/login # any page url which for inject cookie and local storage
11
+ cookies: "auth_cookie_id=demo" # cookie string to be injected
12
+ local-storage: # local storage to be injected
13
+ username: demo # key and value
14
+ auth_token: demo # key and value
15
+
9
16
  index-page: # index page settings
10
17
  url: https://clojure.org/guides/repl/introduction # URL of index page
11
18
  title: Clojure Guides # title for the book, use page's title if not set
@@ -23,6 +30,7 @@ index-page: # index page settings
23
30
  - selector: "script[src]"
24
31
  attr: src
25
32
 
33
+ max-recursion: 2 # max depth of recursive downloading
26
34
 
27
35
  pages: # settings for content pages
28
36
  - url-pattern: '.*' # URL pattern for content page, only pages' URL matching this pattern will be processed
@@ -34,6 +42,7 @@ pages: # settings for content pages
34
42
  style.innerHTML = '.clj-content-container { margin-left: 0; }';
35
43
  document.body.appendChild(style);
36
44
  selector: html # CSS selector for the content to be saved
45
+ page-link-selector: "a:not([href='../guides'])" # links to be downloaded recursively which are extracted from page content
37
46
  assets: # assets to be downloaded
38
47
  - selector: img # CSS selector for assets
39
48
  attr: src # attribute name for the asset URL
@@ -41,4 +50,36 @@ pages: # settings for content pages
41
50
  attr: href
42
51
  - selector: "script[src]"
43
52
  attr: src
53
+
54
+ extra-pages: # settings for extra pages
55
+ - https://www.example.com/extra-page-1
56
+
57
+ post-process: | # Shell script to be executed after the book is downloaded
58
+ find . -name '*.html' -exec sed -i 's/https:\/\/clojure.org\/guides\/repl\/introduction/..\/index.html/g' {} \;
44
59
 
60
+ docset: # config for generate docset
61
+ name: "Clojure Guides" # docset name
62
+ keyword: "clojure" # docset keyword
63
+ icon: "clojure.png" # docset icon
64
+ index: "/index.html" # docset index page
65
+ pages: # docset pages config
66
+ - url-pattern: 'index.html' # URL pattern for docset page, will match against local downloaded pages
67
+ extractor: | # JavaScript code to extract the content for docset page
68
+ (function() {
69
+ var links = [...document.querySelectorAll('a.data-url')];
70
+ return links.map(link => {
71
+ var name = link.innerText;
72
+ var tag = link;
73
+ while (tag.tagName != 'BODY') {
74
+ tag = tag.parentElement;
75
+ if (tag.previousElementSibling && tag.previousElementSibling.tagName == 'H1') {
76
+ name = tag.previousElementSibling.innerText + ' - ' + name;
77
+ }
78
+ }
79
+ return { # Should return an array of objects with name, path, type attributes
80
+ path: link.href, # path of the page, should be relative to DEMO.docset/Contents/Resources/Documents (starts with /)
81
+ name: name, # name of extracted element
82
+ type: 'Guide' # type of extracted element
83
+ };
84
+ });
85
+ })();
data/mkwebook.gemspec CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.add_dependency 'activesupport', '>= 6.1.5'
25
25
  spec.add_dependency 'concurrent-ruby'
26
26
  spec.add_dependency 'ferrum', '>= 0.13'
27
+ spec.add_dependency 'sqlite3', '~> 1.5.4'
27
28
  spec.add_dependency 'thor', '>= 1.2.1'
28
29
 
29
30
  spec.add_development_dependency 'pry'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mkwebook
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Liu Xiang
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-09 00:00:00.000000000 Z
11
+ date: 2022-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.13'
55
+ - !ruby/object:Gem::Dependency
56
+ name: sqlite3
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 1.5.4
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 1.5.4
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: thor
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -132,8 +146,8 @@ files:
132
146
  - lib/mkwebook/cli.rb
133
147
  - lib/mkwebook/commands.rb
134
148
  - lib/mkwebook/concerns.rb
135
- - lib/mkwebook/concerns/global_data_definition.rb
136
149
  - lib/mkwebook/config.rb
150
+ - lib/mkwebook/entry_types.txt
137
151
  - lib/mkwebook/ext.rb
138
152
  - lib/mkwebook/ext/string.rb
139
153
  - lib/mkwebook/version.rb
@@ -1,244 +0,0 @@
1
- require 'active_support/concern'
2
-
3
- module Mkwebook
4
- module Concerns
5
- module GlobalDataDefinition
6
- extend ActiveSupport::Concern
7
-
8
- included do
9
-
10
- # Example:
11
- #
12
- # create_table :post, id: false, primary_key: :id do |t|
13
- # t.column :id, :bigint, precison: 19, comment: 'ID'
14
- # t.column :name, :string, comment: '名称'
15
- # t.column :gmt_created, :datetime, comment: '创建时间'
16
- # t.column :gmt_modified, :datetime, comment: '最后修改时间'
17
- # end
18
- #
19
- # Creates a new table with the name +table_name+. +table_name+ may either
20
- # be a String or a Symbol.
21
- #
22
- # There are two ways to work with #create_table. You can use the block
23
- # form or the regular form, like this:
24
- #
25
- # === Block form
26
- #
27
- # # create_table() passes a TableDefinition object to the block.
28
- # # This form will not only create the table, but also columns for the
29
- # # table.
30
- #
31
- # create_table(:suppliers) do |t|
32
- # t.column :name, :string, limit: 60
33
- # # Other fields here
34
- # end
35
- #
36
- # === Block form, with shorthand
37
- #
38
- # # You can also use the column types as method calls, rather than calling the column method.
39
- # create_table(:suppliers) do |t|
40
- # t.string :name, limit: 60
41
- # # Other fields here
42
- # end
43
- #
44
- # === Regular form
45
- #
46
- # # Creates a table called 'suppliers' with no columns.
47
- # create_table(:suppliers)
48
- # # Add a column to 'suppliers'.
49
- # add_column(:suppliers, :name, :string, {limit: 60})
50
- #
51
- # The +options+ hash can include the following keys:
52
- # [<tt>:id</tt>]
53
- # Whether to automatically add a primary key column. Defaults to true.
54
- # Join tables for {ActiveRecord::Base.has_and_belongs_to_many}[rdoc-ref:Associations::ClassMethods#has_and_belongs_to_many] should set it to false.
55
- #
56
- # A Symbol can be used to specify the type of the generated primary key column.
57
- # [<tt>:primary_key</tt>]
58
- # The name of the primary key, if one is to be added automatically.
59
- # Defaults to +id+. If <tt>:id</tt> is false, then this option is ignored.
60
- #
61
- # If an array is passed, a composite primary key will be created.
62
- #
63
- # Note that Active Record models will automatically detect their
64
- # primary key. This can be avoided by using
65
- # {self.primary_key=}[rdoc-ref:AttributeMethods::PrimaryKey::ClassMethods#primary_key=] on the model
66
- # to define the key explicitly.
67
- #
68
- # [<tt>:options</tt>]
69
- # Any extra options you want appended to the table definition.
70
- # [<tt>:temporary</tt>]
71
- # Make a temporary table.
72
- # [<tt>:force</tt>]
73
- # Set to true to drop the table before creating it.
74
- # Set to +:cascade+ to drop dependent objects as well.
75
- # Defaults to false.
76
- # [<tt>:if_not_exists</tt>]
77
- # Set to true to avoid raising an error when the table already exists.
78
- # Defaults to false.
79
- # [<tt>:as</tt>]
80
- # SQL to use to generate the table. When this option is used, the block is
81
- # ignored, as are the <tt>:id</tt> and <tt>:primary_key</tt> options.
82
- #
83
- # ====== Add a backend specific option to the generated SQL (MySQL)
84
- #
85
- # create_table(:suppliers, options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8mb4')
86
- #
87
- # generates:
88
- #
89
- # CREATE TABLE suppliers (
90
- # id bigint auto_increment PRIMARY KEY
91
- # ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
92
- #
93
- # ====== Rename the primary key column
94
- #
95
- # create_table(:objects, primary_key: 'guid') do |t|
96
- # t.column :name, :string, limit: 80
97
- # end
98
- #
99
- # generates:
100
- #
101
- # CREATE TABLE objects (
102
- # guid bigint auto_increment PRIMARY KEY,
103
- # name varchar(80)
104
- # )
105
- #
106
- # ====== Change the primary key column type
107
- #
108
- # create_table(:tags, id: :string) do |t|
109
- # t.column :label, :string
110
- # end
111
- #
112
- # generates:
113
- #
114
- # CREATE TABLE tags (
115
- # id varchar PRIMARY KEY,
116
- # label varchar
117
- # )
118
- #
119
- # ====== Create a composite primary key
120
- #
121
- # create_table(:orders, primary_key: [:product_id, :client_id]) do |t|
122
- # t.belongs_to :product
123
- # t.belongs_to :client
124
- # end
125
- #
126
- # generates:
127
- #
128
- # CREATE TABLE order (
129
- # product_id bigint NOT NULL,
130
- # client_id bigint NOT NULL
131
- # );
132
- #
133
- # ALTER TABLE ONLY "orders"
134
- # ADD CONSTRAINT orders_pkey PRIMARY KEY (product_id, client_id);
135
- #
136
- # ====== Do not add a primary key column
137
- #
138
- # create_table(:categories_suppliers, id: false) do |t|
139
- # t.column :category_id, :bigint
140
- # t.column :supplier_id, :bigint
141
- # end
142
- #
143
- # generates:
144
- #
145
- # CREATE TABLE categories_suppliers (
146
- # category_id bigint,
147
- # supplier_id bigint
148
- # )
149
- #
150
- # ====== Create a temporary table based on a query
151
- #
152
- # create_table(:long_query, temporary: true,
153
- # as: "SELECT * FROM orders INNER JOIN line_items ON order_id=orders.id")
154
- #
155
- # generates:
156
- #
157
- # CREATE TEMPORARY TABLE long_query AS
158
- # SELECT * FROM orders INNER JOIN line_items ON order_id=orders.id
159
- #
160
- # See also TableDefinition#column for details on how to create columns.
161
- def create_table(table_name, **options, &blk)
162
- ActiveRecord::Base.connection.create_table(table_name, **options, &blk)
163
- end
164
-
165
- # Creates a new join table with the name created using the lexical order of the first two
166
- # arguments. These arguments can be a String or a Symbol.
167
- #
168
- # # Creates a table called 'assemblies_parts' with no id.
169
- # create_join_table(:assemblies, :parts)
170
- #
171
- # You can pass an +options+ hash which can include the following keys:
172
- # [<tt>:table_name</tt>]
173
- # Sets the table name, overriding the default.
174
- # [<tt>:column_options</tt>]
175
- # Any extra options you want appended to the columns definition.
176
- # [<tt>:options</tt>]
177
- # Any extra options you want appended to the table definition.
178
- # [<tt>:temporary</tt>]
179
- # Make a temporary table.
180
- # [<tt>:force</tt>]
181
- # Set to true to drop the table before creating it.
182
- # Defaults to false.
183
- #
184
- # Note that #create_join_table does not create any indices by default; you can use
185
- # its block form to do so yourself:
186
- #
187
- # create_join_table :products, :categories do |t|
188
- # t.index :product_id
189
- # t.index :category_id
190
- # end
191
- #
192
- # ====== Add a backend specific option to the generated SQL (MySQL)
193
- #
194
- # create_join_table(:assemblies, :parts, options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8')
195
- #
196
- # generates:
197
- #
198
- # CREATE TABLE assemblies_parts (
199
- # assembly_id bigint NOT NULL,
200
- # part_id bigint NOT NULL,
201
- # ) ENGINE=InnoDB DEFAULT CHARSET=utf8
202
- #
203
- def create_join_table(table_1, table_2, column_options: {}, **options)
204
- ActiveRecord::Base.connection.create_join_table(table_1, table_2, column_options, **options)
205
- end
206
-
207
- # Drops a table from the database.
208
- #
209
- # [<tt>:force</tt>]
210
- # Set to +:cascade+ to drop dependent objects as well.
211
- # Defaults to false.
212
- # [<tt>:if_exists</tt>]
213
- # Set to +true+ to only drop the table if it exists.
214
- # Defaults to false.
215
- #
216
- # Although this command ignores most +options+ and the block if one is given,
217
- # it can be helpful to provide these in a migration's +change+ method so it can be reverted.
218
- # In that case, +options+ and the block will be used by #create_table.
219
- def drop_table(table_name, **options)
220
- ActiveRecord::Base.connection.drop_table(table_name, **options)
221
- end
222
-
223
- # Drops the join table specified by the given arguments.
224
- # See #create_join_table for details.
225
- #
226
- # Although this command ignores the block if one is given, it can be helpful
227
- # to provide one in a migration's +change+ method so it can be reverted.
228
- # In that case, the block will be used by #create_join_table.
229
- def drop_join_table(table_1, table_2, **options)
230
- ActiveRecord::Base.connection.drop_join_table(table_1, table_2, **options)
231
- end
232
-
233
- # Renames a table.
234
- #
235
- # rename_table('octopuses', 'octopi')
236
- #
237
- def rename_table(table_name, new_name)
238
- ActiveRecord::Base.connection.rename_table(table_name, new_name)
239
- end
240
-
241
- end
242
- end
243
- end
244
- end