mkwebook 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 91b2c6fce12ddd620f497fc5a902c000525d8581e34ede7107ac55b2771871e4
4
- data.tar.gz: 11ca2232b8c30848b352737eb9afebafbfda49e0a974b93586627390a524d977
3
+ metadata.gz: a7e29166ba302805e68e70779ef8de58870671aab0ae684d1cec2290f5a0b4bf
4
+ data.tar.gz: 5e530d48d11ce6c26ac5255b7b294b15b6f90bde7b4ecc4e36ee2bc0e0ea7d54
5
5
  SHA512:
6
- metadata.gz: 7aaafd73130c773b6f2b5a942ab525ee95fb84a4a7b01e4ee890edabc1554563e9f3c6dc3fbd36b3321212a50969006e1a9bfd5efb0e0028a92b18bae4df319d
7
- data.tar.gz: c8578b37ba25133d81e487f5486c0eba9c16712d048b64e0bf2336ef28a0310ce5b93994bc43634c692e671d043a1b97a70124d0ddf616e9330ebf669e0c32ea
6
+ metadata.gz: 1b90f0fbd51ad20e65847ca7fde950fc40651c3639a24f28b73c52580547e19e9b93f8e8a60247e3d56046afd2cfb9d758a5903c569b871093c841ad2513a52b
7
+ data.tar.gz: f5f17d96c4700ddd423fffe592a702812049a21e65ee113cccf12ba5c38b3dbc8af9a1307711ec96c03e4757e12a68c586e15497d33ccbf99e036078962e7cca
data/Gemfile.lock CHANGED
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- mkwebook (0.1.0)
4
+ mkwebook (0.1.2)
5
5
  activesupport (>= 6.1.5)
6
6
  concurrent-ruby
7
7
  ferrum (>= 0.13)
8
+ sqlite3 (~> 1.5.4)
8
9
  thor (>= 1.2.1)
9
10
 
10
11
  GEM
@@ -40,6 +41,7 @@ GEM
40
41
  yard (~> 0.9.11)
41
42
  public_suffix (5.0.0)
42
43
  rake (12.3.3)
44
+ sqlite3 (1.5.4-x86_64-darwin)
43
45
  thor (1.2.1)
44
46
  tzinfo (2.0.5)
45
47
  concurrent-ruby (~> 1.0)
data/lib/mkwebook/app.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require 'sqlite3'
2
3
  require 'Mkwebook/config'
3
4
  require 'ferrum'
4
5
  require 'pry-byebug'
@@ -14,7 +15,9 @@ module Mkwebook
14
15
  Dir.chdir(cli_options[:work_dir])
15
16
  end
16
17
  @cli_options = cli_options
17
- @config = Mkwebook::Config.new(@cli_options[:pause] || @cli_options[:pause_on_error] || @cli_options[:single_thread])
18
+ @config = Mkwebook::Config.new(@cli_options)
19
+ @downloaded_depth = 0
20
+ @downloaded_pages = []
18
21
  end
19
22
 
20
23
  def create_config
@@ -25,20 +28,42 @@ module Mkwebook
25
28
  File.join(Mkwebook::GEM_ROOT, 'template', 'mkwebook.yml')
26
29
  end
27
30
 
28
- def make
29
- make_index
30
- make_pages
31
+ def download
32
+ download_index
33
+ append_extra_pages
34
+ download_pages
35
+ modify_page_links
36
+ post_process
31
37
  end
32
38
 
33
39
  def prepare_browser
34
40
  @browser = Ferrum::Browser.new(browser_options)
35
41
  @browser_context = browser.contexts.create
42
+ set_auth_info if @config.authentication?
36
43
  end
37
44
 
38
- def make_index
45
+ def set_auth_info
46
+ page = @browser_context.create_page
47
+ page.go_to(@config[:authentication][:url])
48
+ if @config[:authentication][:cookies]
49
+ page.execute("document.cookie = '#{@config[:authentication][:cookies]}'")
50
+ end
51
+
52
+ if @config[:authentication][:local_storage]
53
+ @config[:authentication][:local_storage].each do |key, value|
54
+ page.execute("localStorage.setItem('#{key}', '#{value}')")
55
+ end
56
+ end
57
+ end
58
+
59
+ def download_index(only_index = false)
39
60
  prepare_browser
40
61
  index_page = @browser_context.create_page
62
+ begin
41
63
  index_page.go_to(@config[:index_page][:url])
64
+ rescue Ferrum::PendingConnectionsError => e
65
+ index_page.go_to(@config[:index_page][:url])
66
+ end
42
67
  index_page.network.wait_for_idle(timeout: 10) rescue nil
43
68
  modifier = @config[:index_page][:modifier]
44
69
  if modifier && File.file?(modifier)
@@ -51,8 +76,15 @@ module Mkwebook
51
76
  @page_urls = index_elements.flat_map do |element|
52
77
  url = element.css(@config[:index_page][:link_selector]).map { |a| a.evaluate('this.href') }
53
78
  element.css(@config[:index_page][:link_selector]).each do |a|
54
- u = a.evaluate('this.href').normalize_uri('.html').relative_path_from(@config[:index_page][:output])
55
- a.evaluate("this.href = '#{u}'")
79
+ u = a.evaluate('this.href')
80
+ href = u.normalize_uri('.html').relative_path_from(@config[:index_page][:output])
81
+ file = @config[:index_page][:output]
82
+ a.evaluate <<~JS
83
+ (function(that) {
84
+ that.setAttribute('data-mkwebook-href', '#{href.gsub("'", "\\\\'")}')
85
+ that.setAttribute('data-mkwebook-file', '#{file.gsub("'", "\\\\'")}')
86
+ })(this);
87
+ JS
56
88
  end
57
89
  url
58
90
  end.uniq
@@ -61,9 +93,6 @@ module Mkwebook
61
93
  @config[:pages].any? { |page| url =~ Regexp.new(page[:url_pattern]) }
62
94
  end
63
95
 
64
- @page_urls = @page_urls[0, @cli_options[:limit]] if @cli_options[:limit]
65
-
66
-
67
96
  @config[:index_page][:title].try do |title|
68
97
  index_page.execute("document.title = '#{title}'")
69
98
  end
@@ -82,17 +111,25 @@ module Mkwebook
82
111
  end.join("\n").tap do |html|
83
112
  File.write(@config[:index_page][:output], html)
84
113
  end
114
+ @downloaded_pages << {file: @config[:index_page][:output], url: @config[:index_page][:url]}
115
+ modify_page_links if only_index
85
116
  rescue Ferrum::Error => e
86
117
  binding.pry
87
118
  end
88
119
 
89
- def make_pages
120
+ def download_pages
121
+ return unless @downloaded_depth < @config[:max_recursion]
90
122
 
91
123
  pool = Concurrent::FixedThreadPool.new(@config[:concurrency])
92
124
 
125
+ @page_urls = @page_urls[0, @cli_options[:limit]] if @cli_options[:limit]
126
+
127
+ @page_links = @page_urls.map { |url| [url, []] }.to_h
128
+
93
129
  @page_urls.each do |url|
94
130
  page_config = @config[:pages].find { |page| url =~ Regexp.new(page[:url_pattern]) }
95
131
  next unless page_config
132
+ next if @downloaded_pages.any? { |page| page[:url] == url }
96
133
 
97
134
  pool.post do
98
135
  page = @browser_context.create_page
@@ -113,6 +150,13 @@ module Mkwebook
113
150
  page.execute("document.title = '#{title}'")
114
151
  end
115
152
 
153
+ if page_link_selector = page_config[:page_link_selector]
154
+ page_links = page_elements.flat_map do |element|
155
+ element.css(page_link_selector).map { |a| a.evaluate('this.href') }
156
+ end.uniq
157
+ @page_links[url] = page_links
158
+ end
159
+
116
160
  page.execute <<-JS
117
161
  for (var e of document.querySelectorAll('[integrity]')) {
118
162
  e.removeAttribute('integrity');
@@ -124,18 +168,25 @@ module Mkwebook
124
168
 
125
169
  page_elements.map do |element|
126
170
  element.css('a').each do |a|
127
- u = a.evaluate('this.href')
128
- next unless @page_urls.include?(u)
129
-
130
- u = u.normalize_uri('.html').relative_path_from(url.normalize_uri('.html'))
131
- a.evaluate("this.href = '#{u}'")
171
+ u = a.evaluate('this.href') rescue nil
172
+ next unless u.present?
173
+ href = u.normalize_uri('.html').relative_path_from(url.normalize_uri('.html'))
174
+ file = u.normalize_file_path('.html')
175
+ a.evaluate <<~JS
176
+ (function(that) {
177
+ that.setAttribute('data-mkwebook-href', '#{href.gsub("'", "\\\\'")}')
178
+ that.setAttribute('data-mkwebook-file', '#{file.gsub("'", "\\\\'")}')
179
+ })(this);
180
+ JS
132
181
  end
133
182
  element.evaluate('this.outerHTML')
134
183
  end.join("\n").tap do |html|
135
184
  FileUtils.mkdir_p(File.dirname(output))
136
185
  File.write(output, html)
137
186
  end
138
- rescue Ferrum::Error => e
187
+
188
+ @downloaded_pages << {file: output, url: url}
189
+ rescue => e
139
190
  $stderr.puts e.message
140
191
  $stderr.puts e.backtrace
141
192
  binding.pry if @cli_options[:pause_on_error]
@@ -143,11 +194,30 @@ module Mkwebook
143
194
  page.close
144
195
  end
145
196
  end
146
-
147
197
  end
148
198
 
149
199
  pool.shutdown
150
200
  pool.wait_for_termination
201
+
202
+ @page_urls = @page_links.flat_map(&:last).uniq
203
+ @downloaded_depth += 1
204
+ download_pages
205
+ end
206
+
207
+ def post_process
208
+ @config[:post_process].try do |script|
209
+ if File.file?(script)
210
+ system(script)
211
+ else
212
+ system('bash', '-c', script)
213
+ end
214
+ end
215
+ end
216
+
217
+ def append_extra_pages
218
+ @config[:extra_pages]&.each do |url|
219
+ @page_urls << url
220
+ end
151
221
  end
152
222
 
153
223
  def download_assets(page, assets_config, page_uri = nil)
@@ -170,6 +240,134 @@ module Mkwebook
170
240
  end
171
241
  end
172
242
 
243
+ def make_docset
244
+ docset_config = @config[:docset]
245
+ docset_name = "#{docset_config[:name]}.docset"
246
+ doc_path = "#{docset_name}/Contents/Resources/Documents"
247
+ dsidx_path = "#{docset_name}/Contents/Resources/docSet.dsidx"
248
+ icon_path = "#{docset_name}/icon.png"
249
+ info = "#{docset_name}/Contents/info.plist"
250
+
251
+ if Dir.exist?(docset_name)
252
+ puts 'Docset directory already exist!'
253
+ else
254
+ FileUtils.mkdir_p(doc_path)
255
+ puts "Create the docset directory!"
256
+ end
257
+
258
+ # Copy files
259
+ FileUtils.cp_r(Dir.glob("*") - [docset_name], doc_path)
260
+ puts 'Copy the HTML documentations!'
261
+
262
+ # Init SQLite
263
+
264
+ FileUtils.rm_f(dsidx_path)
265
+ db = SQLite3::Database.new(dsidx_path)
266
+ db.execute <<-SQL
267
+ CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);
268
+ SQL
269
+ db.execute <<-SQL
270
+ CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);
271
+ SQL
272
+ puts 'Create the SQLite Index'
273
+
274
+ pages = Dir.glob("#{doc_path}/**/*.html").select do |file|
275
+ docset_config[:pages].find { |page| file =~ Regexp.new(page[:url_pattern]) }
276
+ end
277
+
278
+ pages = pages[0, @cli_options[:limit]] if @cli_options[:limit]
279
+
280
+ prepare_browser
281
+
282
+ page = @browser_context.create_page
283
+
284
+ elements = pages.flat_map do |file|
285
+ begin
286
+ page.go_to("file://#{File.expand_path(file)}")
287
+ page_config = docset_config[:pages].find { |page| file =~ Regexp.new(page[:url_pattern]) }
288
+ page.evaluate(page_config[:extractor]) || []
289
+ rescue => e
290
+ puts e.message
291
+ puts e.backtrace
292
+ end
293
+ end
294
+
295
+ elements.uniq.compact.each do |element|
296
+ name = element['name']
297
+ type = element['type']
298
+ path = element['path'].sub(%r{.*\.docset/Contents/Resources/Documents}, '')
299
+ db.execute('INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?, ?, ?);', [name, type, path])
300
+ end
301
+
302
+ plist_content = <<-PLIST
303
+ <?xml version="1.0" encoding="UTF-8"?>
304
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
305
+ <plist version="1.0">
306
+ <dict>
307
+ <key>CFBundleIdentifier</key>
308
+ <string>#{docset_name.sub(/.docset/, '')}</string>
309
+ <key>CFBundleName</key>
310
+ <string>#{docset_name.sub(/.docset/, '')}</string>
311
+ <key>DashDocSetFamily</key>
312
+ <string>#{docset_name.sub(/.docset/, '')}</string>
313
+ <key>DocSetPlatformFamily</key>
314
+ <string>#{docset_config[:keyword] || docset_name.downcaseload.sub(/.docset/, '')}</string>
315
+ <key>isDashDocset</key>
316
+ <true/>
317
+ <key>isJavaScriptEnabled</key>
318
+ <true/>
319
+ <key>dashIndexFilePath</key>
320
+ <string>#{docset_config[:index]}</string>
321
+ </dict>
322
+ </plist>
323
+ PLIST
324
+ File.open(info, 'w') { |f| f.write(plist_content)}
325
+
326
+ # Add icon
327
+ if docset_config[:icon]
328
+ if docset_config[:icon].end_with?('.png')
329
+ FileUtils.cp(docset_config[:icon], icon_path)
330
+ puts 'Create the icon for docset!'
331
+ else
332
+ puts '**Error**: icon file should be a valid PNG image!'
333
+ exit(2)
334
+ end
335
+ end
336
+ end
337
+
338
+ def list_entry_types
339
+ puts IO.read("#{__dir__}/entry_types.txt")
340
+ end
341
+
342
+ def modify_page_links
343
+ pool = Concurrent::FixedThreadPool.new(@config[:concurrency])
344
+ downloaded_files = @downloaded_pages.map { |page| page[:file] }
345
+ downloaded_files.each do |file|
346
+ pool.post do
347
+ begin
348
+ page = @browser_context.create_page
349
+ page.go_to("file://#{File.expand_path(file)}")
350
+ page.css('a').each do |a|
351
+ href = a.evaluate('this.getAttribute("data-mkwebook-href")') rescue nil
352
+ next unless href
353
+ f = a.evaluate('this.getAttribute("data-mkwebook-file")')
354
+ next unless href && f && downloaded_files.include?(f)
355
+ a.evaluate("this.href = this.getAttribute('data-mkwebook-href')")
356
+ end
357
+ File.write(file, page.evaluate('document.querySelector("html").outerHTML'))
358
+ rescue Ferrum::Error => e
359
+ $stderr.puts e.message
360
+ $stderr.puts e.backtrace
361
+ binding.pry if @cli_options[:pause_on_error]
362
+ ensure
363
+ page.close
364
+ end
365
+ end
366
+ end
367
+ pool.shutdown
368
+ pool.wait_for_termination
369
+ end
370
+
173
371
  private
174
372
 
175
373
  def browser_options
data/lib/mkwebook/cli.rb CHANGED
@@ -9,6 +9,7 @@ module Mkwebook
9
9
  end
10
10
 
11
11
  class_option :work_dir, :type => :string, :aliases => '-d', :default => '.', :desc => 'Working directory'
12
+ class_option :headmode, :type => :boolean, :aliases => '-H', :default => nil, :desc => "Headful mode, this option will override the config file's headless setting"
12
13
  class_option :pause_on_error, :type => :boolean, :aliases => '-e', :default => false, :desc => 'Pause on error, this option will force concurrency off'
13
14
  desc 'init', 'Create config file in current directory'
14
15
  def init
@@ -16,18 +17,29 @@ module Mkwebook
16
17
  end
17
18
 
18
19
  option :pause, :type => :boolean, :aliases => '-p', :desc => 'Pause after processing index page'
19
- desc 'make_index', 'Download and process index page'
20
- def make_index
21
- Mkwebook::App.new(options).make_index
20
+ desc 'download_index', 'Download and process index page'
21
+ def download_index
22
+ Mkwebook::App.new(options).download_index(true)
22
23
  end
23
24
 
24
25
  option :limit, :type => :numeric, :aliases => '-l', :desc => 'Limit number of pages, specially for debugging'
25
26
  option :pause, :type => :boolean, :aliases => '-P', :desc => 'Pause before quit, this option will force concurrency off'
26
27
  option :pause_on_index, :type => :boolean, :aliases => '-p', :desc => 'Pause after processing index page'
27
28
  option :single_thread, :type => :boolean, :aliases => '-s', :desc => 'Force conccurency off'
28
- desc 'make', 'Download and process html files'
29
- def make
30
- Mkwebook::App.new(options).make
29
+ desc 'download', 'Download and process html files'
30
+ def download
31
+ Mkwebook::App.new(options).download
32
+ end
33
+
34
+ option :limit, :type => :numeric, :aliases => '-l', :desc => 'Limit number of pages, specially for debugging'
35
+ option :list, :type => :boolean, :aliases => '-L', :desc => 'List all available Dash.app entry types'
36
+ desc 'docset', 'Create docset'
37
+ def docset
38
+ if options[:list]
39
+ Mkwebook::App.new(options).list_entry_types
40
+ else
41
+ Mkwebook::App.new(options).make_docset
42
+ end
31
43
  end
32
44
 
33
45
  desc 'version', 'Print version'
@@ -1 +0,0 @@
1
- require 'mkwebook/concerns/global_data_definition'
@@ -3,29 +3,32 @@ require 'etc'
3
3
 
4
4
  module Mkwebook
5
5
  class Config < SimpleDelegator
6
- attr_accessor :file, :config
6
+ attr_accessor :file, :config, :cli_options
7
7
 
8
- def initialize(force_concurrency_off)
8
+ def initialize(cli_options = {})
9
9
  super(nil)
10
+ @cli_options = cli_options
10
11
  @file = find_mkwebook_yaml
11
12
  if @file && File.exist?(@file)
12
- @config = load(@file, force_concurrency_off)
13
+ @config = load(@file)
13
14
  __setobj__(@config)
14
15
  else
15
16
  __setobj__(self)
16
17
  end
17
18
  end
18
19
 
19
- def load(config_file, force_concurrency_off)
20
+ def load(config_file)
20
21
  default_config = {
21
22
  'browser' => {
22
- 'headless' => true
23
+ 'headless' => true,
23
24
  },
24
- 'concurrency': 1
25
+ 'concurrency': 1,
26
+ 'max-recursion': 1
25
27
  }
26
28
  config = YAML.load_file(config_file)
27
29
  config = default_config.deep_merge(config).deep_transform_keys! { |k| k.to_s.underscore.to_sym }
28
- config[:concurrency] = 1 if force_concurrency_off
30
+ config[:concurrency] = 1 if force_single_threaded?
31
+ config[:browser][:headless] = false if @cli_options[:headmode]
29
32
  config
30
33
  end
31
34
 
@@ -33,6 +36,10 @@ module Mkwebook
33
36
  config[:concurrency].present?
34
37
  end
35
38
 
39
+ def authentication?
40
+ config.dig(:authentication, :cookies).present? || config.dig(:authentication, :local_storage).present?
41
+ end
42
+
36
43
  def find_mkwebook_yaml
37
44
  dir = Dir.pwd
38
45
  while dir != '/'
@@ -45,5 +52,9 @@ module Mkwebook
45
52
  dir = File.dirname(dir)
46
53
  end
47
54
  end
55
+
56
+ def force_single_threaded?
57
+ @cli_options[:pause] || @cli_options[:pause_on_error] || @cli_options[:single_thread]
58
+ end
48
59
  end
49
60
  end
@@ -0,0 +1,75 @@
1
+ Annotation
2
+ Attribute
3
+ Binding
4
+ Builtin
5
+ Callback
6
+ Category
7
+ Class
8
+ Command
9
+ Component
10
+ Constant
11
+ Constructor
12
+ Define
13
+ Delegate
14
+ Diagram
15
+ Directive
16
+ Element
17
+ Entry
18
+ Enum
19
+ Environment
20
+ Error
21
+ Event
22
+ Exception
23
+ Extension
24
+ Field
25
+ File
26
+ Filter
27
+ Framework
28
+ Function
29
+ Global
30
+ Guide
31
+ Hook
32
+ Instance
33
+ Instruction
34
+ Interface
35
+ Keyword
36
+ Library
37
+ Literal
38
+ Macro
39
+ Method
40
+ Mixin
41
+ Modifier
42
+ Module
43
+ Namespace
44
+ Notation
45
+ Object
46
+ Operator
47
+ Option
48
+ Package
49
+ Parameter
50
+ Plugin
51
+ Procedure
52
+ Property
53
+ Protocol
54
+ Provider
55
+ Provisioner
56
+ Query
57
+ Record
58
+ Resource
59
+ Sample
60
+ Section
61
+ Service
62
+ Setting
63
+ Shortcut
64
+ Statement
65
+ Struct
66
+ Style
67
+ Subroutine
68
+ Tag
69
+ Test
70
+ Trait
71
+ Type
72
+ Union
73
+ Value
74
+ Variable
75
+ Word
@@ -1,3 +1,5 @@
1
+ require 'uri'
2
+
1
3
  class String
2
4
  def p
3
5
  puts self
@@ -12,24 +14,24 @@ class String
12
14
  end
13
15
 
14
16
  def normalize_file_path(force_extname = nil)
17
+ return self unless present?
15
18
  uri = URI.parse(self)
16
19
  file_path = uri.path[1..]
17
- extname = File.extname(file_path)
20
+ extname = force_extname || File.extname(file_path)
18
21
  basename = File.basename(file_path, extname)
19
22
  origin = "#{uri.scheme.try { |s| s + '_' }}#{uri.host}#{uri.port.try { |p| '_' + p.to_s }}"
20
23
  basename += "_#{Digest::MD5.hexdigest(uri.query)}" if uri.query.present?
21
- extname = force_extname if force_extname && extname.empty?
22
- File.join(origin, File.dirname(file_path), basename + extname)
24
+ URI.decode_www_form_component(File.join(origin, File.dirname(file_path), basename + extname))
23
25
  end
24
26
 
25
27
  def normalize_uri(force_extname = nil)
28
+ return self unless present?
26
29
  uri = URI.parse(self)
27
30
  file_path = uri.path[1..]
28
- extname = File.extname(file_path)
31
+ extname = force_extname || File.extname(file_path)
29
32
  basename = File.basename(file_path, extname)
30
33
  basename += "_#{Digest::MD5.hexdigest(uri.query)}" if uri.query.present?
31
34
  origin = "#{uri.scheme.try { |s| s + '_' }}#{uri.host}#{uri.port.try { |p| '_' + p.to_s }}"
32
- extname = force_extname if force_extname && extname.empty?
33
35
  file_path = File.join(origin, File.dirname(file_path), basename + extname)
34
36
  if uri.fragment.present?
35
37
  file_path += "##{uri.fragment}"
@@ -1,3 +1,3 @@
1
1
  module Mkwebook
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.2"
3
3
  end
@@ -6,6 +6,13 @@ browser: # browser settings
6
6
 
7
7
  concurrency: 16 # number of concurrent threads, default is no conccurency
8
8
 
9
+ authentication: # authentication settings
10
+ url: https://example.com/login # any page url which for inject cookie and local storage
11
+ cookies: "auth_cookie_id=demo" # cookie string to be injected
12
+ local-storage: # local storage to be injected
13
+ username: demo # key and value
14
+ auth_token: demo # key and value
15
+
9
16
  index-page: # index page settings
10
17
  url: https://clojure.org/guides/repl/introduction # URL of index page
11
18
  title: Clojure Guides # title for the book, use page's title if not set
@@ -23,6 +30,7 @@ index-page: # index page settings
23
30
  - selector: "script[src]"
24
31
  attr: src
25
32
 
33
+ max-recursion: 2 # max depth of recursive downloading
26
34
 
27
35
  pages: # settings for content pages
28
36
  - url-pattern: '.*' # URL pattern for content page, only pages' URL matching this pattern will be processed
@@ -34,6 +42,7 @@ pages: # settings for content pages
34
42
  style.innerHTML = '.clj-content-container { margin-left: 0; }';
35
43
  document.body.appendChild(style);
36
44
  selector: html # CSS selector for the content to be saved
45
+ page-link-selector: "a:not([href='../guides'])" # links to be downloaded recursively which are extracted from page content
37
46
  assets: # assets to be downloaded
38
47
  - selector: img # CSS selector for assets
39
48
  attr: src # attribute name for the asset URL
@@ -41,4 +50,36 @@ pages: # settings for content pages
41
50
  attr: href
42
51
  - selector: "script[src]"
43
52
  attr: src
53
+
54
+ extra-pages: # settings for extra pages
55
+ - https://www.example.com/extra-page-1
56
+
57
+ post-process: | # Shell script to be executed after the book is downloaded
58
+ find . -name '*.html' -exec sed -i 's/https:\/\/clojure.org\/guides\/repl\/introduction/..\/index.html/g' {} \;
44
59
 
60
+ docset: # config for generate docset
61
+ name: "Clojure Guides" # docset name
62
+ keyword: "clojure" # docset keyword
63
+ icon: "clojure.png" # docset icon
64
+ index: "/index.html" # docset index page
65
+ pages: # docset pages config
66
+ - url-pattern: 'index.html' # URL pattern for docset page, will match against local downloaded pages
67
+ extractor: | # JavaScript code to extract the content for docset page
68
+ (function() {
69
+ var links = [...document.querySelectorAll('a.data-url')];
70
+ return links.map(link => {
71
+ var name = link.innerText;
72
+ var tag = link;
73
+ while (tag.tagName != 'BODY') {
74
+ tag = tag.parentElement;
75
+ if (tag.previousElementSibling && tag.previousElementSibling.tagName == 'H1') {
76
+ name = tag.previousElementSibling.innerText + ' - ' + name;
77
+ }
78
+ }
79
+ return { # Should return an array of objects with name, path, type attributes
80
+ path: link.href, # path of the page, should be relative to DEMO.docset/Contents/Resources/Documents (starts with /)
81
+ name: name, # name of extracted element
82
+ type: 'Guide' # type of extracted element
83
+ };
84
+ });
85
+ })();
data/mkwebook.gemspec CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.add_dependency 'activesupport', '>= 6.1.5'
25
25
  spec.add_dependency 'concurrent-ruby'
26
26
  spec.add_dependency 'ferrum', '>= 0.13'
27
+ spec.add_dependency 'sqlite3', '~> 1.5.4'
27
28
  spec.add_dependency 'thor', '>= 1.2.1'
28
29
 
29
30
  spec.add_development_dependency 'pry'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mkwebook
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Liu Xiang
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-09 00:00:00.000000000 Z
11
+ date: 2022-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.13'
55
+ - !ruby/object:Gem::Dependency
56
+ name: sqlite3
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 1.5.4
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 1.5.4
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: thor
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -132,8 +146,8 @@ files:
132
146
  - lib/mkwebook/cli.rb
133
147
  - lib/mkwebook/commands.rb
134
148
  - lib/mkwebook/concerns.rb
135
- - lib/mkwebook/concerns/global_data_definition.rb
136
149
  - lib/mkwebook/config.rb
150
+ - lib/mkwebook/entry_types.txt
137
151
  - lib/mkwebook/ext.rb
138
152
  - lib/mkwebook/ext/string.rb
139
153
  - lib/mkwebook/version.rb
@@ -1,244 +0,0 @@
1
- require 'active_support/concern'
2
-
3
- module Mkwebook
4
- module Concerns
5
- module GlobalDataDefinition
6
- extend ActiveSupport::Concern
7
-
8
- included do
9
-
10
- # Example:
11
- #
12
- # create_table :post, id: false, primary_key: :id do |t|
13
- # t.column :id, :bigint, precison: 19, comment: 'ID'
14
- # t.column :name, :string, comment: '名称'
15
- # t.column :gmt_created, :datetime, comment: '创建时间'
16
- # t.column :gmt_modified, :datetime, comment: '最后修改时间'
17
- # end
18
- #
19
- # Creates a new table with the name +table_name+. +table_name+ may either
20
- # be a String or a Symbol.
21
- #
22
- # There are two ways to work with #create_table. You can use the block
23
- # form or the regular form, like this:
24
- #
25
- # === Block form
26
- #
27
- # # create_table() passes a TableDefinition object to the block.
28
- # # This form will not only create the table, but also columns for the
29
- # # table.
30
- #
31
- # create_table(:suppliers) do |t|
32
- # t.column :name, :string, limit: 60
33
- # # Other fields here
34
- # end
35
- #
36
- # === Block form, with shorthand
37
- #
38
- # # You can also use the column types as method calls, rather than calling the column method.
39
- # create_table(:suppliers) do |t|
40
- # t.string :name, limit: 60
41
- # # Other fields here
42
- # end
43
- #
44
- # === Regular form
45
- #
46
- # # Creates a table called 'suppliers' with no columns.
47
- # create_table(:suppliers)
48
- # # Add a column to 'suppliers'.
49
- # add_column(:suppliers, :name, :string, {limit: 60})
50
- #
51
- # The +options+ hash can include the following keys:
52
- # [<tt>:id</tt>]
53
- # Whether to automatically add a primary key column. Defaults to true.
54
- # Join tables for {ActiveRecord::Base.has_and_belongs_to_many}[rdoc-ref:Associations::ClassMethods#has_and_belongs_to_many] should set it to false.
55
- #
56
- # A Symbol can be used to specify the type of the generated primary key column.
57
- # [<tt>:primary_key</tt>]
58
- # The name of the primary key, if one is to be added automatically.
59
- # Defaults to +id+. If <tt>:id</tt> is false, then this option is ignored.
60
- #
61
- # If an array is passed, a composite primary key will be created.
62
- #
63
- # Note that Active Record models will automatically detect their
64
- # primary key. This can be avoided by using
65
- # {self.primary_key=}[rdoc-ref:AttributeMethods::PrimaryKey::ClassMethods#primary_key=] on the model
66
- # to define the key explicitly.
67
- #
68
- # [<tt>:options</tt>]
69
- # Any extra options you want appended to the table definition.
70
- # [<tt>:temporary</tt>]
71
- # Make a temporary table.
72
- # [<tt>:force</tt>]
73
- # Set to true to drop the table before creating it.
74
- # Set to +:cascade+ to drop dependent objects as well.
75
- # Defaults to false.
76
- # [<tt>:if_not_exists</tt>]
77
- # Set to true to avoid raising an error when the table already exists.
78
- # Defaults to false.
79
- # [<tt>:as</tt>]
80
- # SQL to use to generate the table. When this option is used, the block is
81
- # ignored, as are the <tt>:id</tt> and <tt>:primary_key</tt> options.
82
- #
83
- # ====== Add a backend specific option to the generated SQL (MySQL)
84
- #
85
- # create_table(:suppliers, options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8mb4')
86
- #
87
- # generates:
88
- #
89
- # CREATE TABLE suppliers (
90
- # id bigint auto_increment PRIMARY KEY
91
- # ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
92
- #
93
- # ====== Rename the primary key column
94
- #
95
- # create_table(:objects, primary_key: 'guid') do |t|
96
- # t.column :name, :string, limit: 80
97
- # end
98
- #
99
- # generates:
100
- #
101
- # CREATE TABLE objects (
102
- # guid bigint auto_increment PRIMARY KEY,
103
- # name varchar(80)
104
- # )
105
- #
106
- # ====== Change the primary key column type
107
- #
108
- # create_table(:tags, id: :string) do |t|
109
- # t.column :label, :string
110
- # end
111
- #
112
- # generates:
113
- #
114
- # CREATE TABLE tags (
115
- # id varchar PRIMARY KEY,
116
- # label varchar
117
- # )
118
- #
119
- # ====== Create a composite primary key
120
- #
121
- # create_table(:orders, primary_key: [:product_id, :client_id]) do |t|
122
- # t.belongs_to :product
123
- # t.belongs_to :client
124
- # end
125
- #
126
- # generates:
127
- #
128
- # CREATE TABLE order (
129
- # product_id bigint NOT NULL,
130
- # client_id bigint NOT NULL
131
- # );
132
- #
133
- # ALTER TABLE ONLY "orders"
134
- # ADD CONSTRAINT orders_pkey PRIMARY KEY (product_id, client_id);
135
- #
136
- # ====== Do not add a primary key column
137
- #
138
- # create_table(:categories_suppliers, id: false) do |t|
139
- # t.column :category_id, :bigint
140
- # t.column :supplier_id, :bigint
141
- # end
142
- #
143
- # generates:
144
- #
145
- # CREATE TABLE categories_suppliers (
146
- # category_id bigint,
147
- # supplier_id bigint
148
- # )
149
- #
150
- # ====== Create a temporary table based on a query
151
- #
152
- # create_table(:long_query, temporary: true,
153
- # as: "SELECT * FROM orders INNER JOIN line_items ON order_id=orders.id")
154
- #
155
- # generates:
156
- #
157
- # CREATE TEMPORARY TABLE long_query AS
158
- # SELECT * FROM orders INNER JOIN line_items ON order_id=orders.id
159
- #
160
- # See also TableDefinition#column for details on how to create columns.
161
- def create_table(table_name, **options, &blk)
162
- ActiveRecord::Base.connection.create_table(table_name, **options, &blk)
163
- end
164
-
165
- # Creates a new join table with the name created using the lexical order of the first two
166
- # arguments. These arguments can be a String or a Symbol.
167
- #
168
- # # Creates a table called 'assemblies_parts' with no id.
169
- # create_join_table(:assemblies, :parts)
170
- #
171
- # You can pass an +options+ hash which can include the following keys:
172
- # [<tt>:table_name</tt>]
173
- # Sets the table name, overriding the default.
174
- # [<tt>:column_options</tt>]
175
- # Any extra options you want appended to the columns definition.
176
- # [<tt>:options</tt>]
177
- # Any extra options you want appended to the table definition.
178
- # [<tt>:temporary</tt>]
179
- # Make a temporary table.
180
- # [<tt>:force</tt>]
181
- # Set to true to drop the table before creating it.
182
- # Defaults to false.
183
- #
184
- # Note that #create_join_table does not create any indices by default; you can use
185
- # its block form to do so yourself:
186
- #
187
- # create_join_table :products, :categories do |t|
188
- # t.index :product_id
189
- # t.index :category_id
190
- # end
191
- #
192
- # ====== Add a backend specific option to the generated SQL (MySQL)
193
- #
194
- # create_join_table(:assemblies, :parts, options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8')
195
- #
196
- # generates:
197
- #
198
- # CREATE TABLE assemblies_parts (
199
- # assembly_id bigint NOT NULL,
200
- # part_id bigint NOT NULL,
201
- # ) ENGINE=InnoDB DEFAULT CHARSET=utf8
202
- #
203
- def create_join_table(table_1, table_2, column_options: {}, **options)
204
- ActiveRecord::Base.connection.create_join_table(table_1, table_2, column_options, **options)
205
- end
206
-
207
- # Drops a table from the database.
208
- #
209
- # [<tt>:force</tt>]
210
- # Set to +:cascade+ to drop dependent objects as well.
211
- # Defaults to false.
212
- # [<tt>:if_exists</tt>]
213
- # Set to +true+ to only drop the table if it exists.
214
- # Defaults to false.
215
- #
216
- # Although this command ignores most +options+ and the block if one is given,
217
- # it can be helpful to provide these in a migration's +change+ method so it can be reverted.
218
- # In that case, +options+ and the block will be used by #create_table.
219
- def drop_table(table_name, **options)
220
- ActiveRecord::Base.connection.drop_table(table_name, **options)
221
- end
222
-
223
- # Drops the join table specified by the given arguments.
224
- # See #create_join_table for details.
225
- #
226
- # Although this command ignores the block if one is given, it can be helpful
227
- # to provide one in a migration's +change+ method so it can be reverted.
228
- # In that case, the block will be used by #create_join_table.
229
- def drop_join_table(table_1, table_2, **options)
230
- ActiveRecord::Base.connection.drop_join_table(table_1, table_2, **options)
231
- end
232
-
233
- # Renames a table.
234
- #
235
- # rename_table('octopuses', 'octopi')
236
- #
237
- def rename_table(table_name, new_name)
238
- ActiveRecord::Base.connection.rename_table(table_name, new_name)
239
- end
240
-
241
- end
242
- end
243
- end
244
- end