sitemap_gen 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/sitemap-gen +41 -0
- data/lib/sitemap_gen/version.rb +3 -0
- data/lib/sitemap_gen.rb +62 -0
- metadata +105 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 9806fa1dd6d1946761e6e877cda97887632fcb6a
|
|
4
|
+
data.tar.gz: 54441447f5e85f4980b365058d9b94658db7c774
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 14025eb1aae87595b4828d0964f801e59bf485c8bbb1526d2f1b41be8b2abad98a816c5f170e53fd5d526a00af8429abd1783a87e4350c4e4f20adca20290455
|
|
7
|
+
data.tar.gz: 55eaa213439a2378052724f2ba5d5767d51ff14379284f10fb129436569c520d6ab565629a1d59eb961ad051b8c0588cda68831edf00a2e17c927b8114d35f39
|
data/bin/sitemap-gen
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'sitemap_gen'
|
|
4
|
+
require 'optparse'
|
|
5
|
+
|
|
6
|
+
options = {}
|
|
7
|
+
|
|
8
|
+
OptionParser.new do |opts|
|
|
9
|
+
opts.banner = <<-EOS
|
|
10
|
+
|
|
11
|
+
A sitemap generator use directory structure input
|
|
12
|
+
Usage: sitemap-gen [OPTION] PATH
|
|
13
|
+
|
|
14
|
+
Options:
|
|
15
|
+
EOS
|
|
16
|
+
|
|
17
|
+
opts.on('-i', '--input [PATH]', 'Input directory that need to generate csv') do |path|
|
|
18
|
+
options[:input] = path
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
opts.on('-u', '--base_url [PATH]', 'Base url of website') do |path|
|
|
22
|
+
options[:base_url] = path
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
opts.on('-o', '--output [PATH]', 'Path to save output csv') do |path|
|
|
26
|
+
options[:output] = path
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
opts.on('-h', '--help', 'Display information') do |help|
|
|
30
|
+
puts opts
|
|
31
|
+
exit
|
|
32
|
+
end
|
|
33
|
+
end.parse!
|
|
34
|
+
|
|
35
|
+
if options.key?(:input) && options.key?(:base_url)
|
|
36
|
+
if options.key?(:output)
|
|
37
|
+
SitemapGen.run(options[:input], options[:base_url], options[:output])
|
|
38
|
+
exit
|
|
39
|
+
end
|
|
40
|
+
SitemapGen.run(options[:input], options[:base_url])
|
|
41
|
+
end
|
data/lib/sitemap_gen.rb
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
require 'csv'
|
|
3
|
+
|
|
4
|
+
module SitemapGen
|
|
5
|
+
IGNORE_DIRS_REGEX = /img|cgi-bin|images|css|js/i
|
|
6
|
+
|
|
7
|
+
class << self
|
|
8
|
+
def run(dir_path, base_url, save_path = nil)
|
|
9
|
+
generate_csv(csv_data(dir_path, base_url), save_path)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def generate_csv(data, save_path)
|
|
13
|
+
header = data.inject([]) { |max, row| max.size < row.keys.size ? row.keys : max }
|
|
14
|
+
save_path ||= Dir.pwd
|
|
15
|
+
CSV.open("#{save_path}/sitemap.csv", 'wb') do |csv|
|
|
16
|
+
csv << header
|
|
17
|
+
data.each do |row|
|
|
18
|
+
csv << row.values
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def csv_data(dir_path, base_url)
|
|
24
|
+
# If there is a foward slash at the end of dir path then remove it
|
|
25
|
+
#dir_path = dir_path[0..-2] if dir_path[-1] =~ /\//
|
|
26
|
+
|
|
27
|
+
# Exit if there is no html files
|
|
28
|
+
html_files = Dir.glob("#{dir_path}/**/*.html")
|
|
29
|
+
exit if html_files.empty?
|
|
30
|
+
|
|
31
|
+
data = []
|
|
32
|
+
html_files.each_with_index do |file_path, i|
|
|
33
|
+
next if file_path =~ IGNORE_DIRS_REGEX
|
|
34
|
+
server_pathname = file_path.sub(dir_path, '')
|
|
35
|
+
base_path = File.dirname(server_pathname)
|
|
36
|
+
last_slash = base_path == '/' ? '' : '/'
|
|
37
|
+
data.push({
|
|
38
|
+
id: i + 1,
|
|
39
|
+
#page_title: page_title(file_path),
|
|
40
|
+
url: base_url + base_path + last_slash
|
|
41
|
+
}.merge(dir_levels(server_pathname)))
|
|
42
|
+
end
|
|
43
|
+
data
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def page_title(file_path)
|
|
47
|
+
html_doc = Nokogiri::HTML(File.read(file_path))
|
|
48
|
+
html_doc.css('head title').first.content
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def dir_levels(server_pathname)
|
|
52
|
+
levels = {}
|
|
53
|
+
dirs = server_pathname.split('/')
|
|
54
|
+
|
|
55
|
+
# Drop first and last element of dirs array, because they are a empty string and a filename
|
|
56
|
+
dirs[1..-2].each_with_index do |dir, i|
|
|
57
|
+
levels.merge!({"level_#{i + 1}": dir})
|
|
58
|
+
end
|
|
59
|
+
levels
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: sitemap_gen
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Minh Phan
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2017-07-05 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: nokogiri
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.8'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '1.8'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: bundler
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '1.13'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '1.13'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rake
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '10.0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '10.0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: rspec
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '3.0'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '3.0'
|
|
69
|
+
description:
|
|
70
|
+
email:
|
|
71
|
+
- wofi.minh@1pac.vn
|
|
72
|
+
executables:
|
|
73
|
+
- sitemap-gen
|
|
74
|
+
extensions: []
|
|
75
|
+
extra_rdoc_files: []
|
|
76
|
+
files:
|
|
77
|
+
- bin/sitemap-gen
|
|
78
|
+
- lib/sitemap_gen.rb
|
|
79
|
+
- lib/sitemap_gen/version.rb
|
|
80
|
+
homepage: https://github.com/1PACVietnam/sitemap-gen
|
|
81
|
+
licenses:
|
|
82
|
+
- MIT
|
|
83
|
+
metadata:
|
|
84
|
+
allowed_push_host: https://rubygems.org
|
|
85
|
+
post_install_message:
|
|
86
|
+
rdoc_options: []
|
|
87
|
+
require_paths:
|
|
88
|
+
- lib
|
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
90
|
+
requirements:
|
|
91
|
+
- - ">="
|
|
92
|
+
- !ruby/object:Gem::Version
|
|
93
|
+
version: '0'
|
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
95
|
+
requirements:
|
|
96
|
+
- - ">="
|
|
97
|
+
- !ruby/object:Gem::Version
|
|
98
|
+
version: '0'
|
|
99
|
+
requirements: []
|
|
100
|
+
rubyforge_project:
|
|
101
|
+
rubygems_version: 2.6.10
|
|
102
|
+
signing_key:
|
|
103
|
+
specification_version: 4
|
|
104
|
+
summary: A generator sitemap based on directory structure
|
|
105
|
+
test_files: []
|