sitemaps_parser 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f6a5cf10a4446ce42e918767ee0279a81178951c
4
- data.tar.gz: 878301d4d7ffd4429a0d4763d9ecb4ccca719039
3
+ metadata.gz: 48e74e772cd689a18f735341f9cc1272494e26b8
4
+ data.tar.gz: 12737b00e5a24ae20ccb17c079b3fc3621e555c8
5
5
  SHA512:
6
- metadata.gz: 1a209890b6494fb17fed6c234656854a657dec8dc5d6012d752d6962339b8b5c06f8cd0facb15c96884f18bcabf4bb2a56aea53b84ad8326695ae43c4e57fe2b
7
- data.tar.gz: 108c423e4969c262bf00959fa6ae4d5855c0d35faf10d21c282d3a05413e2f4e9178767dfce86da7872231dc48ac7a81338c72a6b1f809d73ff2d7e3e11a7c9d
6
+ metadata.gz: 98d19dfb44b860c00401bcf2318e6a077101cd27a11fa7665d8d4b2d1a5bcbdfe11d0bd43e9b1da1e5e378dbe9eb92898b8132f2e6fe883e4df5fc0a0ae94138
7
+ data.tar.gz: e966d408e8edb14f66fce1ad6ce4f3053a53667a9e56ef26d3d3375311b3b4b83a794da859b174c67532dddaa4d01e6fb22cb0d5ec97bd0c856f2190bd0e4fb9
@@ -9,9 +9,10 @@ module Sitemaps
9
9
  # @param source [String] an XML string to parse.
10
10
  # @param max_entries [Integer, nil] the maximum number of entries to add to the sitemap.
11
11
  # @param filter [#call, nil] if provided, called per entry to filter the entry out of the sitemap.
12
+ # @param filter_indexes [Boolean, nil] if truthy, filter is called per submap as well as entries.
12
13
  # @return [Sitemap] the sitemap parsed from the XML string. If the XML string given is invalid,
13
14
  # a sitemap will still be returned, but the entries and sitemaps keys will be empty.
14
- def self.parse(source, max_entries: nil, filter: nil)
15
+ def self.parse(source, max_entries: nil, filter: nil, filter_indexes: nil)
15
16
  document = REXML::Document.new(source)
16
17
  entries = document.elements.to_a("/urlset/url").map do |root|
17
18
  loc = parse_loc(root) || next
@@ -29,7 +30,8 @@ module Sitemaps
29
30
  loc = parse_loc(root) || next
30
31
  mod = parse_lastmod(root)
31
32
 
32
- Sitemaps::Submap.new(loc, mod)
33
+ submap = Sitemaps::Submap.new(loc, mod)
34
+ (!filter || !filter_indexes || filter.call(submap)) ? submap : nil
33
35
  end.reject(&:nil?)
34
36
 
35
37
  Sitemaps::Sitemap.new(entries, sitemaps)
@@ -1,3 +1,3 @@
1
1
  module Sitemaps
2
- VERSION = "0.2.0".freeze
2
+ VERSION = "0.2.1".freeze
3
3
  end
data/lib/sitemaps.rb CHANGED
@@ -50,7 +50,7 @@ module Sitemaps
50
50
  # a good idea to include, as many sites have _very_ large sitemaps.
51
51
  # @return [Sitemap]
52
52
  #
53
- # @overload fetch(url, fetcher: nil, max_entries: nil)
53
+ # @overload fetch(url, fetcher: nil, filter_indexes: nil, max_entries: nil)
54
54
  # If a block is given, it's used as a filter for entries before they're added to the sitemap.
55
55
  #
56
56
  # @param url [String, URI] the url of the sitemap in question.
@@ -58,17 +58,19 @@ module Sitemaps
58
58
  # @param max_entries [Integer] the maximum number of entries to include in the sitemap. Once the
59
59
  # sitemap has this many entries, further fetches and parsing will not occur. This is always
60
60
  # a good idea to include, as many sites have _very_ large sitemaps.
61
+ # @param filter_indexes [Boolean] if true, Submap instances will be run through the filter block
62
+ # as well as Entry instances.
61
63
  # @return [Sitemap]
62
64
  # @yield [Entry] Filters the entry from the sitemap if the block returns falsey.
63
65
  # @yieldreturn [Boolean] whether or not to include the entry in the sitemap.
64
- def self.fetch(url, fetcher: nil, max_entries: nil, &block)
66
+ def self.fetch(url, fetcher: nil, max_entries: nil, filter_indexes: nil, &block)
65
67
  fetcher ||= @default_fetcher
66
68
  unless url.is_a? URI
67
69
  url = "http://#{url}" unless url =~ %r{^https?://}
68
70
  url = URI.parse(url)
69
71
  end
70
72
 
71
- _instance.fetch_recursive(url, fetcher, max_entries, &block)
73
+ _instance.fetch_recursive(url, fetcher, max_entries, filter_indexes, &block)
72
74
  end
73
75
 
74
76
  # Discover, fetch and parse sitemaps from the given host.
@@ -93,10 +95,12 @@ module Sitemaps
93
95
  # @param max_entries [Integer] the maximum number of entries to include in the sitemap. Once the
94
96
  # sitemap has this many entries, further fetches and parsing will not occur. This is always
95
97
  # a good idea to include, as many sites have _very_ large sitemaps.
98
+ # @param filter_indexes [Boolean] if true, Submap instances will be run through the filter block
99
+ # as well as Entry instances.
96
100
  # @return [Sitemap]
97
101
  # @yield [Entry] Filters the entry from the sitemap if the block returns falsey.
98
102
  # @yieldreturn [Boolean] whether or not to include the entry in the sitemap.
99
- def self.discover(url, fetcher: nil, max_entries: nil, &block)
103
+ def self.discover(url, fetcher: nil, max_entries: nil, filter_indexes: nil, &block)
100
104
  fetcher ||= @default_fetcher
101
105
  unless url.is_a? URI
102
106
  url = "http://#{url}" unless url =~ %r{^https?://}
@@ -104,7 +108,7 @@ module Sitemaps
104
108
  end
105
109
 
106
110
  roots = _instance.discover_roots(url, fetcher)
107
- _instance.fetch_recursive(roots, fetcher, max_entries, &block)
111
+ _instance.fetch_recursive(roots, fetcher, max_entries, filter_indexes, &block)
108
112
  end
109
113
 
110
114
  # @return [Instance]
@@ -120,7 +124,7 @@ module Sitemaps
120
124
  class Instance
121
125
  # recursively fetch sitemaps and sitemap indexes from the given urls.
122
126
  # @return [Sitemap]
123
- def fetch_recursive(urls, fetcher, max_entries, &block)
127
+ def fetch_recursive(urls, fetcher, max_entries, filter_indexes, &block)
124
128
  queue = urls.is_a?(Array) ? urls : [urls]
125
129
  maps = {}
126
130
 
@@ -134,7 +138,7 @@ module Sitemaps
134
138
 
135
139
  # fetch this item in the queue, and queue up any sub maps it found
136
140
  source = fetcher.call(url)
137
- sitemap = Sitemaps::Parser.parse(source, max_entries: max_entries, filter: block)
141
+ sitemap = Sitemaps::Parser.parse(source, max_entries: max_entries, filter: block, filter_indexes: filter_indexes)
138
142
 
139
143
  # save the results and queue up any submaps it found
140
144
  maps[url] = sitemap
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitemaps_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Raphaelson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-24 00:00:00.000000000 Z
11
+ date: 2016-03-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler