sitemap_gen 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9806fa1dd6d1946761e6e877cda97887632fcb6a
4
+ data.tar.gz: 54441447f5e85f4980b365058d9b94658db7c774
5
+ SHA512:
6
+ metadata.gz: 14025eb1aae87595b4828d0964f801e59bf485c8bbb1526d2f1b41be8b2abad98a816c5f170e53fd5d526a00af8429abd1783a87e4350c4e4f20adca20290455
7
+ data.tar.gz: 55eaa213439a2378052724f2ba5d5767d51ff14379284f10fb129436569c520d6ab565629a1d59eb961ad051b8c0588cda68831edf00a2e17c927b8114d35f39
data/bin/sitemap-gen ADDED
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'sitemap_gen'
4
+ require 'optparse'
5
+
6
+ options = {}
7
+
8
+ OptionParser.new do |opts|
9
+ opts.banner = <<-EOS
10
+
11
+ A sitemap generator use directory structure input
12
+ Usage: sitemap-gen [OPTION] PATH
13
+
14
+ Options:
15
+ EOS
16
+
17
+ opts.on('-i', '--input [PATH]', 'Input directory that need to generate csv') do |path|
18
+ options[:input] = path
19
+ end
20
+
21
+ opts.on('-u', '--base_url [PATH]', 'Base url of website') do |path|
22
+ options[:base_url] = path
23
+ end
24
+
25
+ opts.on('-o', '--output [PATH]', 'Path to save output csv') do |path|
26
+ options[:output] = path
27
+ end
28
+
29
+ opts.on('-h', '--help', 'Display information') do |help|
30
+ puts opts
31
+ exit
32
+ end
33
+ end.parse!
34
+
35
+ if options.key?(:input) && options.key?(:base_url)
36
+ if options.key?(:output)
37
+ SitemapGen.run(options[:input], options[:base_url], options[:output])
38
+ exit
39
+ end
40
+ SitemapGen.run(options[:input], options[:base_url])
41
+ end
@@ -0,0 +1,3 @@
1
+ module SitemapGen
2
+ VERSION = '0.1.0'
3
+ end
@@ -0,0 +1,62 @@
1
+ require 'nokogiri'
2
+ require 'csv'
3
+
4
+ module SitemapGen
5
+ IGNORE_DIRS_REGEX = /img|cgi-bin|images|css|js/i
6
+
7
+ class << self
8
+ def run(dir_path, base_url, save_path = nil)
9
+ generate_csv(csv_data(dir_path, base_url), save_path)
10
+ end
11
+
12
+ def generate_csv(data, save_path)
13
+ header = data.inject([]) { |max, row| max.size < row.keys.size ? row.keys : max }
14
+ save_path ||= Dir.pwd
15
+ CSV.open("#{save_path}/sitemap.csv", 'wb') do |csv|
16
+ csv << header
17
+ data.each do |row|
18
+ csv << row.values
19
+ end
20
+ end
21
+ end
22
+
23
+ def csv_data(dir_path, base_url)
24
+ # If there is a foward slash at the end of dir path then remove it
25
+ #dir_path = dir_path[0..-2] if dir_path[-1] =~ /\//
26
+
27
+ # Exit if there is no html files
28
+ html_files = Dir.glob("#{dir_path}/**/*.html")
29
+ exit if html_files.empty?
30
+
31
+ data = []
32
+ html_files.each_with_index do |file_path, i|
33
+ next if file_path =~ IGNORE_DIRS_REGEX
34
+ server_pathname = file_path.sub(dir_path, '')
35
+ base_path = File.dirname(server_pathname)
36
+ last_slash = base_path == '/' ? '' : '/'
37
+ data.push({
38
+ id: i + 1,
39
+ #page_title: page_title(file_path),
40
+ url: base_url + base_path + last_slash
41
+ }.merge(dir_levels(server_pathname)))
42
+ end
43
+ data
44
+ end
45
+
46
+ def page_title(file_path)
47
+ html_doc = Nokogiri::HTML(File.read(file_path))
48
+ html_doc.css('head title').first.content
49
+ end
50
+
51
+ def dir_levels(server_pathname)
52
+ levels = {}
53
+ dirs = server_pathname.split('/')
54
+
55
+ # Drop first and last element of dirs array, because they are a empty string and a filename
56
+ dirs[1..-2].each_with_index do |dir, i|
57
+ levels.merge!({"level_#{i + 1}": dir})
58
+ end
59
+ levels
60
+ end
61
+ end
62
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sitemap_gen
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Minh Phan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-07-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.8'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.8'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.13'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.13'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ description:
70
+ email:
71
+ - wofi.minh@1pac.vn
72
+ executables:
73
+ - sitemap-gen
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - bin/sitemap-gen
78
+ - lib/sitemap_gen.rb
79
+ - lib/sitemap_gen/version.rb
80
+ homepage: https://github.com/1PACVietnam/sitemap-gen
81
+ licenses:
82
+ - MIT
83
+ metadata:
84
+ allowed_push_host: https://rubygems.org
85
+ post_install_message:
86
+ rdoc_options: []
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ requirements: []
100
+ rubyforge_project:
101
+ rubygems_version: 2.6.10
102
+ signing_key:
103
+ specification_version: 4
104
+ summary: A generator sitemap based on directory structure
105
+ test_files: []