sitemap_gen 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b8cdcdb67f361a65fc5e59cd7adff5d5d07b1507
4
- data.tar.gz: 025423411014ac08aaa105615394a9d6cbc090fc
3
+ metadata.gz: a183db3b9c498c875fbe993c7d5dcdf916fce480
4
+ data.tar.gz: 0e3cf9657840e7d3fc54b883e0ae66c9911c3cb3
5
5
  SHA512:
6
- metadata.gz: e04e2e4ae4a36f3e8ed0a64cb706ff3bb1303224cee0bb375d77fab8ed68e638d34cf15cdfef2867a4839d1fee7a79f43354d97d7e0f3f7ac6f33191db4ce697
7
- data.tar.gz: 4d252c9a5965a7587b1abb32666a55ede6d33e3ac31cdfcc593e4a9c088ef5e74f707980d9118de52fec63972da7ef2f89a00d524c6d8f8f06696cdc7d6da48b
6
+ metadata.gz: 0cc31773493aad0da6fc22979c5a008c224da287c7eba6086d246dc0eab15c28c9f9832b85d78bca0262623115a21d35886cca1a7ec86fc83f22b1aeab1086a0
7
+ data.tar.gz: c200be43c228bbd3fdd5939424ca3946366cd3a7c65aa11a63565e324b53cd19582451d19e850a64e696a1921903cd85d9139cd764e9f245a364229bf1c584c6
data/lib/sitemap_gen.rb CHANGED
@@ -1,64 +1,78 @@
1
1
  require 'nokogiri'
2
2
  require 'csv'
3
+ require 'benchmark'
4
+ require 'sitemap_gen/csv'
3
5
 
4
6
  module SitemapGen
5
- IGNORE_DIRS_REGEX = /img|cgi-bin|images|css|js/i
6
-
7
- class << self
8
- def run(dir_path, base_url, save_path = nil)
9
- generate_csv(csv_data(dir_path, base_url), save_path)
10
- end
7
+ def self.run(dir_path, base_url, save_path = nil)
8
+ SitemapGen::Csv.new(dir_path, base_url, save_path).generate
9
+ end
10
+ #class << self
11
+ #def run(dir_path, base_url, save_path = nil)
12
+ #p Benchmark.measure { generate_csv(csv_data(dir_path, base_url), save_path) }
13
+ #end
11
14
 
12
- def generate_csv(data, save_path)
13
- level_header = data.each_with_object([]) { |item, o| o << item[:levels] }
14
- .inject([]) { |max, row| max.size < row.keys.size ? row.keys : max }
15
- save_path ||= Dir.pwd
16
- CSV.open("#{save_path}/sitemap.csv", 'wb') do |csv|
17
- csv << ['id'].concat(level_header).push('url')
18
- data.each_with_index do |row, i|
19
- gap = level_header.length - row[:levels].values.length
20
- csv << [i + 1].concat(row[:levels].values).concat(Array.new(gap) { '' }).push(row[:url])
21
- end
22
- end
23
- end
15
+ #def generate_csv(data, save_path)
16
+ #lheaders = level_headers(data)
17
+ #save_path ||= Dir.pwd
18
+ #CSV.open("#{save_path}/sitemap.csv", 'wb') do |csv|
19
+ #csv << ['id'].concat(lheaders).push('url')
20
+ #data.each_with_index do |row, i|
21
+ #gap = lheaders.length - row[:levels].values.length
22
+ #csv << [i + 1].concat(row[:levels].values).concat(Array.new(gap) { '' }).push(row[:url])
23
+ #end
24
+ #end
25
+ #end
24
26
 
25
- def csv_data(dir_path, base_url)
26
- # If there is a foward slash at the end of dir path then remove it
27
- #dir_path = dir_path[0..-2] if dir_path[-1] =~ /\//
27
+ #def level_headers(data)
28
+ #data.each_with_object([]) { |item, o| o << item[:levels] }
29
+ #.inject([]) { |max, row| max.size < row.keys.size ? row.keys : max }
30
+ #end
28
31
 
29
- # Exit if there is no html files
30
- html_files = Dir.glob("#{dir_path}/**/*.html")
31
- exit if html_files.empty?
32
+ #def csv_data(dir_path, base_url)
33
+ ## Raise error if there is no html files
34
+ #html_files = Dir.glob("#{dir_path}/**/index.html")
35
+ #raise 'There is no html files in your directory' if html_files.empty?
32
36
 
33
- data = []
34
- html_files.each_with_index do |file_path, i|
35
- next if file_path =~ IGNORE_DIRS_REGEX
36
- server_pathname = file_path.sub(dir_path, '')
37
- base_path = File.dirname(server_pathname)
38
- last_slash = base_path == '/' ? '' : '/'
39
- data.push({ url: base_url + base_path + last_slash}
40
- .merge({ levels: dir_levels(dir_path, server_pathname) }))
41
- end
42
- data
43
- end
37
+ #data = []
38
+ #html_files.each_with_index do |file_path, i|
39
+ #next if file_path =~ IGNORE_DIRS_REGEX
40
+ #page_path = file_path.sub(dir_path, '')
41
+ #base_path = File.dirname(page_path)
42
+ #last_slash = base_path == '/' ? '' : '/'
43
+ #short_page_path = page_path.split('/')[0..-2].join('/')
44
+ #data.push({ url: base_url + base_path + last_slash, page_path: short_page_path == '' ? '/' : short_page_path }
45
+ #.merge({ levels: dir_levels(dir_path, page_path) }))
46
+ #end
47
+ #organized_data(dir_path, data)
48
+ #end
44
49
 
45
- def page_title(file_path)
46
- html_doc = Nokogiri::HTML(File.read(file_path))
47
- html_doc.css('head title').first.content
48
- end
50
+ #def page_title(file_path)
51
+ #html_doc = Nokogiri::HTML(File.read(file_path))
52
+ #html_doc.css('head title').first.content
53
+ #end
49
54
 
50
- def dir_levels(dir_path, server_pathname)
51
- levels = {}
52
- dirs = server_pathname.split('/')
55
+ #def dir_levels(dir_path, page_path)
56
+ #levels = {}
57
+ #order = 0
58
+ #page_path.split('/')[1..-2].each_with_index do |dir, i|
59
+ #order = i + 1
60
+ #levels.merge!({"level_#{order}": ''})
61
+ #end
62
+ #html_file = Dir.glob("#{dir_path}#{page_path}").first
63
+ #levels.merge!({"level_#{order + 1}": page_title(html_file)})
64
+ #levels
65
+ #end
53
66
 
54
- # Drop first and last element of dirs array, because they are a empty string and a filename
55
- dirs[1..-2].each_with_index do |dir, i|
56
- current_dir_index = dirs.index(dir)
57
- current_path = dirs[0..current_dir_index].join('/')
58
- html_file = Dir.glob("#{dir_path}#{current_path}/index.html").first
59
- levels.merge!({"level_#{i + 1}": page_title(html_file)})
60
- end
61
- levels
62
- end
63
- end
67
+ #def organized_data(dir_path, data)
68
+ #top_level_dirs = Dir.glob("#{dir_path}/**/index.html").map { |path| File.dirname(path).sub(dir_path, '').split('/')[0..-1].join('/') }.uniq
69
+ #organized_data = []
70
+ #organized_data.concat data.select { |d| d[:page_path] == '/' }
71
+ #top_level_dirs.each do |dir|
72
+ ## We group data by page path and then order by alphabet
73
+ #organized_data.concat(data.select { |d| d[:page_path] =~ /\A#{Regexp.quote(dir)}\z/ }.sort_by { |d| d[:page_path] })
74
+ #end
75
+ #organized_data
76
+ #end
77
+ #end
64
78
  end
@@ -0,0 +1,74 @@
1
+ module SitemapGen
2
+ class Csv
3
+ IGNORE_DIRS_REGEX = /img|cgi-bin|images|css|js/i
4
+
5
+ def initialize(dir_path, base_url, save_path)
6
+ @dir_path = dir_path
7
+ @base_url = base_url
8
+ @save_path = save_path || Dir.pwd
9
+ @max_level = 1
10
+ @html_files = Dir.glob("#{dir_path}/**/index.html").sort_by { |f| File.dirname(f) }
11
+ raise 'There is no index.html files in your directory' if @html_files.empty?
12
+ @sitemaps = create_sitemaps
13
+ end
14
+
15
+ def generate
16
+ CSV.open("#{@save_path}/sitemap.csv", 'wb') do |csv|
17
+ csv << csv_header
18
+ @sitemaps.each_with_index { |item, i| csv << csv_row(item, i) }
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def create_sitemaps
25
+ sitemaps = []
26
+ @html_files.each do |f|
27
+ next if f =~ IGNORE_DIRS_REGEX
28
+ sitemaps.push({ url: @base_url + server_path(f), levels: dir_levels(f) })
29
+ end
30
+ sitemaps
31
+ end
32
+
33
+ def csv_header
34
+ header = ['Id']
35
+ @max_level.to_i.times.each { |l| header.push("Level #{l + 1}") }
36
+ header.push('Url')
37
+ end
38
+
39
+ def csv_row(item, order_num)
40
+ gap = @max_level - item[:levels].values.size
41
+ [order_num + 1].concat(item[:levels].values).concat(Array.new(gap) { '' }).push(item[:url])
42
+ end
43
+
44
+ def html_page_title(file_path)
45
+ html_doc = Nokogiri::HTML(File.read(file_path))
46
+ html_doc.css('head title').first.content
47
+ end
48
+
49
+ def dir_levels(file_path)
50
+ p file_path
51
+ levels = {}
52
+ order = 0
53
+ dirs = server_path(file_path).split('/')
54
+ if dirs.empty?
55
+ levels.merge!({"level_#{order += 1}": html_page_title(file_path)})
56
+ else
57
+ dirs[1..-1].each_with_index do |dir, i|
58
+ levels.merge!({"level_#{order += 1}": ''})
59
+ end
60
+ levels.merge!({"level_#{order += 1}": html_page_title(file_path)})
61
+ end
62
+ set_max_level(order)
63
+ levels
64
+ end
65
+
66
+ def server_path(file_path)
67
+ File.dirname(file_path.sub(@dir_path, ''))
68
+ end
69
+
70
+ def set_max_level(num)
71
+ @max_level = num > @max_level ? num : @max_level
72
+ end
73
+ end
74
+ end
@@ -1,3 +1,3 @@
1
1
  module SitemapGen
2
- VERSION = '0.1.2'
2
+ VERSION = '0.1.3'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitemap_gen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minh Phan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-05 00:00:00.000000000 Z
11
+ date: 2017-07-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -76,6 +76,7 @@ extra_rdoc_files: []
76
76
  files:
77
77
  - bin/sitemap-gen
78
78
  - lib/sitemap_gen.rb
79
+ - lib/sitemap_gen/csv.rb
79
80
  - lib/sitemap_gen/version.rb
80
81
  homepage: https://github.com/1PACVietnam/sitemap-gen
81
82
  licenses: