rubyretriever 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/retriever/fetch.rb +3 -2
- data/lib/retriever/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 752f8125a0153960c3575683d2db334dfd3ad88a
|
4
|
+
data.tar.gz: 96615e58b8d29297184e375747d313e61d6dbf02
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a11d2e56f8eba9730f2f36257105bac94767d510029bd1a909055293f2fefe6602d662d871df371cbe7d80eb35f6a9702b5fe6e02d2858665037c0a578524f8
|
7
|
+
data.tar.gz: 82b4a77637f603a809f62c9c1dbc196f6f71c0f6c886d4de6953ad9af9f134f83119320fea556e70d5815c5fae47925407163edb125eee9e4f85c5269a68a896
|
data/lib/retriever/fetch.rb
CHANGED
@@ -150,8 +150,6 @@ module Retriever
|
|
150
150
|
new_links_arr = process_link_stack
|
151
151
|
@temp_link_stack = []
|
152
152
|
next if new_links_arr.nil? || new_links_arr.empty?
|
153
|
-
# set operations to see are these in our previous visited pages arr
|
154
|
-
next if new_links_arr.empty?
|
155
153
|
@link_stack.concat(new_links_arr)
|
156
154
|
next unless @sitemap
|
157
155
|
@data.concat(new_links_arr)
|
@@ -243,6 +241,9 @@ module Retriever
|
|
243
241
|
end
|
244
242
|
# empty the stack. most clean way
|
245
243
|
@link_stack = []
|
244
|
+
# temp contains redirects + new visitable links
|
245
|
+
# we will re-initialize it as empty right after this function
|
246
|
+
# in the parent method 'async crawl and collect'
|
246
247
|
@temp_link_stack.flatten.uniq!
|
247
248
|
end
|
248
249
|
end
|
data/lib/retriever/version.rb
CHANGED