indexmap 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stringio"
4
+ require "uri"
5
+
6
+ module Indexmap
7
+ module Storage
8
+ class ActiveStorage
9
+ DEFAULT_CONTENT_TYPE = "application/xml"
10
+
11
+ def initialize(model:, public_url:, filename_column: :filename, attachment: :file, content_type: DEFAULT_CONTENT_TYPE)
12
+ @model = model
13
+ @public_url_base = public_url
14
+ @filename_column = filename_column
15
+ @attachment = attachment
16
+ @default_content_type = content_type
17
+ end
18
+
19
+ def write(filename, body, content_type: nil)
20
+ attachment_content_type = content_type || default_content_type
21
+ record = find_or_initialize(filename)
22
+ record.save! unless record.persisted?
23
+ record.public_send(attachment).attach(
24
+ io: StringIO.new(body.to_s),
25
+ filename: filename,
26
+ content_type: attachment_content_type
27
+ )
28
+
29
+ File.new(filename: filename, body: body.to_s, content_type: attachment_content_type)
30
+ end
31
+
32
+ def read(filename)
33
+ record = find_record(filename)
34
+ return unless attached?(record)
35
+
36
+ record.public_send(attachment).download
37
+ end
38
+
39
+ def exist?(filename)
40
+ attached?(find_record(filename))
41
+ end
42
+
43
+ def list(prefix: nil, suffix: nil)
44
+ relation = relation_for(prefix: prefix, suffix: suffix)
45
+
46
+ relation.filter_map do |record|
47
+ filename = record.public_send(filename_column).to_s
48
+ next if prefix && !filename.start_with?(prefix)
49
+ next if suffix && !filename.end_with?(suffix)
50
+ next unless attached?(record)
51
+
52
+ filename
53
+ end.sort
54
+ end
55
+
56
+ def delete(filename)
57
+ record = find_record(filename)
58
+ record&.public_send(attachment)&.purge
59
+ end
60
+
61
+ def public_url(filename)
62
+ URI.join("#{public_url_base}/", filename).to_s
63
+ end
64
+
65
+ def inspect
66
+ "#<#{self.class.name} model=#{model}>"
67
+ end
68
+
69
+ private
70
+
71
+ attr_reader :model, :filename_column, :attachment, :default_content_type
72
+
73
+ def find_or_initialize(filename)
74
+ model.find_or_initialize_by(filename_column => filename)
75
+ end
76
+
77
+ def find_record(filename)
78
+ model.find_by(filename_column => filename)
79
+ end
80
+
81
+ def relation_for(prefix:, suffix:)
82
+ if !model.respond_to?(:where) || (!prefix && !suffix)
83
+ return model.all
84
+ end
85
+
86
+ column = model.connection.quote_column_name(filename_column)
87
+ if prefix && suffix
88
+ model.where("#{column} LIKE ?", "#{prefix}%#{suffix}")
89
+ elsif prefix
90
+ model.where("#{column} LIKE ?", "#{prefix}%")
91
+ else
92
+ model.where("#{column} LIKE ?", "%#{suffix}")
93
+ end
94
+ end
95
+
96
+ def attached?(record)
97
+ record&.public_send(attachment)&.attached?
98
+ end
99
+
100
+ def public_url_base
101
+ @public_url_base.to_s.delete_suffix("/")
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Indexmap
4
+ module Storage
5
+ File = Struct.new(:filename, :body, :content_type, keyword_init: true) do
6
+ def basename
7
+ ::File.basename(filename)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+
5
+ module Indexmap
6
+ module Storage
7
+ class Filesystem
8
+ DEFAULT_CONTENT_TYPE = "application/xml"
9
+
10
+ def initialize(path:, public_url: nil)
11
+ @path = Pathname(path)
12
+ @public_url_base = public_url
13
+ end
14
+
15
+ def write(filename, body, content_type: DEFAULT_CONTENT_TYPE)
16
+ target = path_for(filename)
17
+ target.dirname.mkpath
18
+ target.write(body.to_s)
19
+
20
+ File.new(
21
+ filename: normalize_filename(filename),
22
+ body: body.to_s,
23
+ content_type: content_type || DEFAULT_CONTENT_TYPE
24
+ )
25
+ end
26
+
27
+ def read(filename)
28
+ path_for(filename).read(encoding: "UTF-8")
29
+ end
30
+
31
+ def exist?(filename)
32
+ path_for(filename).file?
33
+ end
34
+
35
+ def list(prefix: nil, suffix: nil)
36
+ path.glob("**/*").select(&:file?).filter_map do |file|
37
+ filename = file.relative_path_from(path).to_s
38
+ next if prefix && !filename.start_with?(prefix)
39
+ next if suffix && !filename.end_with?(suffix)
40
+
41
+ filename
42
+ end.sort
43
+ end
44
+
45
+ def delete(filename)
46
+ path_for(filename).delete if exist?(filename)
47
+ end
48
+
49
+ def public_url(filename)
50
+ return normalize_filename(filename) if public_url_base.to_s.strip.empty?
51
+
52
+ URI.join("#{public_url_base.to_s.delete_suffix("/")}/", normalize_filename(filename)).to_s
53
+ end
54
+
55
+ def inspect
56
+ "#<#{self.class.name} path=#{path}>"
57
+ end
58
+
59
+ private
60
+
61
+ attr_reader :path, :public_url_base
62
+
63
+ def path_for(filename)
64
+ path.join(normalize_filename(filename))
65
+ end
66
+
67
+ def normalize_filename(filename)
68
+ normalized = Pathname(filename.to_s).cleanpath
69
+ if normalized.absolute? || normalized.to_s.start_with?("../") || normalized.to_s == ".."
70
+ raise ArgumentError, "Storage filename must be relative: #{filename.inspect}"
71
+ end
72
+
73
+ normalized.to_s
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+
5
+ module Indexmap
6
+ module Storage
7
+ class Memory
8
+ DEFAULT_CONTENT_TYPE = "application/xml"
9
+
10
+ def initialize(files = [], public_url: nil)
11
+ @files = {}
12
+ @public_url_base = public_url
13
+ Array(files).each do |file|
14
+ write(file.filename, file.body, content_type: file.content_type)
15
+ end
16
+ end
17
+
18
+ def write(filename, body, content_type: DEFAULT_CONTENT_TYPE)
19
+ normalized = normalize_filename(filename)
20
+ files[normalized] = File.new(
21
+ filename: normalized,
22
+ body: body.to_s,
23
+ content_type: content_type || DEFAULT_CONTENT_TYPE
24
+ )
25
+ end
26
+
27
+ def read(filename)
28
+ files.fetch(normalize_filename(filename)).body
29
+ end
30
+
31
+ def exist?(filename)
32
+ files.key?(normalize_filename(filename))
33
+ end
34
+
35
+ def list(prefix: nil, suffix: nil)
36
+ files.keys.select do |filename|
37
+ (prefix.nil? || filename.start_with?(prefix)) &&
38
+ (suffix.nil? || filename.end_with?(suffix))
39
+ end.sort
40
+ end
41
+
42
+ def delete(filename)
43
+ files.delete(normalize_filename(filename))
44
+ end
45
+
46
+ def public_url(filename)
47
+ return normalize_filename(filename) if public_url_base.to_s.strip.empty?
48
+
49
+ URI.join("#{public_url_base.to_s.delete_suffix("/")}/", normalize_filename(filename)).to_s
50
+ end
51
+
52
+ private
53
+
54
+ attr_reader :files, :public_url_base
55
+
56
+ def normalize_filename(filename)
57
+ filename.to_s
58
+ end
59
+ end
60
+ end
61
+ end
@@ -10,15 +10,15 @@ module Indexmap
10
10
 
11
11
  def create
12
12
  written_files = Indexmap.create(configuration: configuration)
13
- index_now_key_path = write_index_now_key
13
+ index_now_key_filename = write_index_now_key if configuration.index_now.write_key_file?
14
14
  configuration.run_after_create_callbacks
15
15
 
16
- {files: written_files.map(&:to_s), written_files: written_files, index_now_key_path: index_now_key_path}
16
+ {files: written_files.map(&:to_s), written_files: written_files, index_now_key_filename: index_now_key_filename}
17
17
  end
18
18
 
19
19
  def format
20
- sitemap_files.each do |file_path|
21
- content = File.read(file_path)
20
+ sitemap_files.each do |filename|
21
+ content = storage.read(filename)
22
22
  document = Nokogiri::XML(
23
23
  content,
24
24
  nil,
@@ -27,7 +27,7 @@ module Indexmap
27
27
  )
28
28
  save_options = Nokogiri::XML::Node::SaveOptions::FORMAT | Nokogiri::XML::Node::SaveOptions::AS_XML
29
29
 
30
- File.write(file_path, document.to_xml(indent: 2, save_with: save_options))
30
+ storage.write(filename, document.to_xml(indent: 2, save_with: save_options), content_type: "application/xml")
31
31
  end
32
32
 
33
33
  sitemap_files
@@ -45,8 +45,8 @@ module Indexmap
45
45
  pinger.write_key_file
46
46
  end
47
47
 
48
- def public_path
49
- default_output.public_path
48
+ def storage
49
+ configuration.storage
50
50
  end
51
51
 
52
52
  private
@@ -58,7 +58,7 @@ module Indexmap
58
58
  end
59
59
 
60
60
  def sitemap_files
61
- Dir.glob(public_path.join("sitemap*.xml")).sort
61
+ storage.list(prefix: "sitemap", suffix: ".xml")
62
62
  end
63
63
  end
64
64
  end
@@ -7,20 +7,17 @@ require "uri"
7
7
 
8
8
  module Indexmap
9
9
  class Validator
10
- def initialize(configuration: Indexmap.configuration, path: nil)
10
+ def initialize(configuration: Indexmap.configuration, filename: nil)
11
11
  @configuration = configuration
12
- @path = path
12
+ @filename = filename
13
13
  end
14
14
 
15
15
  def validate!
16
- sitemap_path = path || Indexmap::Path.existing_public_path(
17
- public_path: configuration.public_path,
18
- index_filename: configuration.index_filename
19
- )
20
- raise ValidationError, "Missing sitemap file: #{sitemap_path}" unless File.exist?(sitemap_path)
21
-
22
- validate_sitemap_file!(sitemap_path)
23
- entries = Parser.new(path: sitemap_path).entries
16
+ sitemap_filename = filename || configuration.index_filename
17
+ raise ValidationError, "Missing sitemap file: #{sitemap_filename}" unless storage.exist?(sitemap_filename)
18
+
19
+ validate_sitemap_file!(sitemap_filename)
20
+ entries = Parser.new(source: sitemap_filename, storage: storage, index_filename: configuration.index_filename).entries
24
21
  validate_presence!(entries)
25
22
  validate_duplicates!(entries)
26
23
  validate_parameterized_urls!(entries)
@@ -33,39 +30,43 @@ module Indexmap
33
30
 
34
31
  private
35
32
 
36
- attr_reader :configuration, :path
33
+ attr_reader :configuration, :filename
34
+
35
+ def storage
36
+ configuration.storage
37
+ end
37
38
 
38
- def validate_sitemap_file!(sitemap_path)
39
- document = read_xml_document(sitemap_path)
39
+ def validate_sitemap_file!(sitemap_filename)
40
+ document = read_xml_document(sitemap_filename)
40
41
  root_name = document.root&.name
41
42
 
42
43
  case root_name
43
44
  when "urlset"
44
- validate_urlset_document!(document, sitemap_path)
45
+ validate_urlset_document!(document, sitemap_filename)
45
46
  when "sitemapindex"
46
- validate_sitemap_index_document!(document, sitemap_path)
47
+ validate_sitemap_index_document!(document, sitemap_filename)
47
48
  else
48
- raise ValidationError, "Invalid sitemap root element in #{sitemap_path}: #{root_name || "none"}"
49
+ raise ValidationError, "Invalid sitemap root element in #{sitemap_filename}: #{root_name || "none"}"
49
50
  end
50
51
  end
51
52
 
52
- def read_xml_document(file_path)
53
- document = Nokogiri::XML(File.read(file_path, encoding: "UTF-8")) { |config| config.strict }
53
+ def read_xml_document(filename)
54
+ document = Nokogiri::XML(storage.read(filename)) { |config| config.strict }
54
55
  document.remove_namespaces!
55
56
  document
56
57
  rescue Nokogiri::XML::SyntaxError => error
57
- raise ValidationError, "Invalid sitemap XML in #{file_path}: #{error.message.lines.first.strip}"
58
+ raise ValidationError, "Invalid sitemap XML in #{filename}: #{error.message.lines.first.strip}"
58
59
  end
59
60
 
60
- def validate_urlset_document!(document, sitemap_path)
61
+ def validate_urlset_document!(document, sitemap_filename)
61
62
  return if document.xpath("/urlset/url/loc").any?
62
63
 
63
- raise ValidationError, "Sitemap has no URLs: #{sitemap_path}"
64
+ raise ValidationError, "Sitemap has no URLs: #{sitemap_filename}"
64
65
  end
65
66
 
66
- def validate_sitemap_index_document!(document, sitemap_path)
67
+ def validate_sitemap_index_document!(document, sitemap_filename)
67
68
  child_locations = document.xpath("/sitemapindex/sitemap/loc").map { |node| node.text.to_s.strip }.reject(&:empty?)
68
- raise ValidationError, "Sitemap index has no child sitemap URLs: #{sitemap_path}" if child_locations.empty?
69
+ raise ValidationError, "Sitemap index has no child sitemap URLs: #{sitemap_filename}" if child_locations.empty?
69
70
 
70
71
  duplicate_children = child_locations.group_by(&:itself).select { |_loc, values| values.size > 1 }.keys
71
72
  unless duplicate_children.empty?
@@ -73,19 +74,30 @@ module Indexmap
73
74
  end
74
75
 
75
76
  child_locations.each do |location|
76
- child_path = local_child_path(sitemap_path, location)
77
- raise ValidationError, "Missing child sitemap file: #{child_path}" unless File.exist?(child_path)
77
+ child_filename = local_child_filename(location, parent_filename: sitemap_filename)
78
+ raise ValidationError, "Missing child sitemap file: #{child_filename}" unless storage.exist?(child_filename)
78
79
 
79
- validate_sitemap_file!(child_path)
80
+ validate_sitemap_file!(child_filename)
80
81
  end
81
82
  end
82
83
 
83
- def local_child_path(sitemap_path, location)
84
+ def local_child_filename(location, parent_filename:)
84
85
  uri = URI.parse(location)
85
- filename = (uri.absolute? || location.start_with?("/")) ? File.basename(uri.path) : location
86
- File.expand_path(filename, File.dirname(sitemap_path))
86
+ if uri.absolute? || location.start_with?("/")
87
+ normalize_local_filename(uri.path)
88
+ else
89
+ parent_directory = File.dirname(parent_filename)
90
+ normalize_local_filename((parent_directory == ".") ? location : File.join(parent_directory, location))
91
+ end
87
92
  rescue URI::InvalidURIError
88
- File.expand_path(location, File.dirname(sitemap_path))
93
+ normalize_local_filename(location)
94
+ end
95
+
96
+ def normalize_local_filename(filename)
97
+ normalized = Pathname(filename.to_s).cleanpath.to_s.sub(%r{\A/+}, "")
98
+ return if normalized.empty? || normalized == ".." || normalized.start_with?("../")
99
+
100
+ normalized
89
101
  end
90
102
 
91
103
  def validate_presence!(entries)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Indexmap
4
- VERSION = "0.6.0"
4
+ VERSION = "0.7.0"
5
5
  end
@@ -4,18 +4,15 @@ module Indexmap
4
4
  class Writer
5
5
  VALID_FORMATS = %i[index single_file].freeze
6
6
 
7
- def initialize(public_path:, base_url:, sections: nil, entries: nil, index_filename: "sitemap.xml", format: :index)
7
+ def initialize(base_url:, sections: nil, entries: nil, index_filename: "sitemap.xml", format: :index)
8
8
  @entries = normalize_entries(entries)
9
9
  @format = normalize_format(format)
10
10
  @sections = normalize_sections(sections)
11
- @public_path = Pathname(public_path)
12
11
  @base_url = base_url
13
12
  @index_filename = index_filename
14
13
  end
15
14
 
16
15
  def write
17
- FileUtils.mkdir_p(public_path)
18
-
19
16
  return [write_file(index_filename, urlset_xml(entries))] if single_file?
20
17
 
21
18
  paths = sections.map do |section|
@@ -25,8 +22,6 @@ module Indexmap
25
22
  paths + [write_file(index_filename, index_xml(sections))]
26
23
  end
27
24
 
28
- attr_accessor :public_path
29
-
30
25
  private
31
26
 
32
27
  attr_reader :base_url, :entries, :format, :index_filename, :sections
@@ -58,9 +53,7 @@ module Indexmap
58
53
  end
59
54
 
60
55
  def write_file(filename, body)
61
- path = public_path.join(filename)
62
- path.write(body)
63
- path
56
+ Storage::File.new(filename: filename, body: body, content_type: "application/xml")
64
57
  end
65
58
 
66
59
  def urlset_xml(entries)
data/lib/indexmap.rb CHANGED
@@ -8,11 +8,14 @@ require "time"
8
8
  require_relative "indexmap/version"
9
9
  require_relative "indexmap/google_configuration"
10
10
  require_relative "indexmap/index_now_configuration"
11
+ require_relative "indexmap/storage/file"
12
+ require_relative "indexmap/storage/memory"
13
+ require_relative "indexmap/storage/filesystem"
14
+ require_relative "indexmap/storage/active_storage"
11
15
  require_relative "indexmap/configuration"
12
16
  require_relative "indexmap/creator"
13
17
  require_relative "indexmap/entry"
14
18
  require_relative "indexmap/output"
15
- require_relative "indexmap/path"
16
19
  require_relative "indexmap/parser"
17
20
  require_relative "indexmap/pinger/base"
18
21
  require_relative "indexmap/pinger/google"
@@ -5,8 +5,8 @@ namespace :indexmap do
5
5
  runner = Indexmap::TaskRunner.new
6
6
  create_result = runner.create
7
7
 
8
- puts "Created, formatted, and validated #{file_count(create_result[:files])} in #{public_directory(runner)}."
9
- puts "IndexNow key file: #{create_result[:index_now_key_path]}" if create_result[:index_now_key_path]
8
+ puts "Created, formatted, and validated #{file_count(create_result[:files])} in #{storage_description(runner)}."
9
+ puts "IndexNow key file: #{create_result[:index_now_key_filename]}" if create_result[:index_now_key_filename]
10
10
  end
11
11
 
12
12
  desc "Format sitemap files for better readability"
@@ -14,7 +14,7 @@ namespace :indexmap do
14
14
  runner = Indexmap::TaskRunner.new
15
15
  formatted_files = runner.format
16
16
 
17
- puts "Formatted #{file_count(formatted_files)} in #{public_directory(runner)}."
17
+ puts "Formatted #{file_count(formatted_files)} in #{storage_description(runner)}."
18
18
  end
19
19
 
20
20
  desc "Validate sitemap shape and URL hygiene"
@@ -87,8 +87,8 @@ namespace :indexmap do
87
87
  "#{count} sitemap #{(count == 1) ? "file" : "files"}"
88
88
  end
89
89
 
90
- def public_directory(runner)
91
- runner.public_path
90
+ def storage_description(runner)
91
+ runner.storage
92
92
  end
93
93
 
94
94
  def format_google_ping_failure(failure)