spieker 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +5 -0
- data/lib/spieker/crawler.rb +1 -1
- data/lib/spieker/link_scraper.rb +11 -4
- data/lib/spieker/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1efbda5b0c8acddf71421f0a398fe81147bba156
|
4
|
+
data.tar.gz: b8d089ba60167155d2f502e6bbe6f06110d55a58
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 33cbdaa59a122b8572c84e6697dd8f2e3c066248a0a770bee2766ec8a0de261eda9f90591f4ae3fc2539a39e402bac78286bec9e5bc0df4f77ac224f4fc61e3a
|
7
|
+
data.tar.gz: 68074bacc9d188afcaf4ce2ac896b6684c3dcea36034b16760a53a564262999628bdf2fedc31a1e2ff3c90e415e7fb286a3f6ace046ecd4e392eff64a4584da1
|
data/CHANGELOG
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
+
0.0.8
|
2
|
+
* Handle query params like a boss
|
3
|
+
* Fix small bug with language not always being set
|
4
|
+
|
1
5
|
0.0.7
|
2
6
|
* Support setting the language as a parameter, default is 'en'
|
3
7
|
* Wait for tolqjs to submit the content before continuing
|
8
|
+
|
4
9
|
0.0.4
|
5
10
|
* Set useragent to Tolq Spieker
|
6
11
|
|
data/lib/spieker/crawler.rb
CHANGED
data/lib/spieker/link_scraper.rb
CHANGED
@@ -39,17 +39,24 @@ module Spieker
|
|
39
39
|
|
40
40
|
def drive_page_for_links
|
41
41
|
begin
|
42
|
-
|
43
|
-
|
42
|
+
query = if @url.query
|
43
|
+
"?#{@url.query}"
|
44
|
+
else
|
45
|
+
""
|
46
|
+
end
|
47
|
+
visit @url.path + query + "#!lang=#{@lang}"
|
48
|
+
# Capybara + selenium causes some links not to be found. There doesn't seem to be any method to that.
|
49
|
+
# Cool is tho, in JS it's a lot faster as well
|
50
|
+
links = page.evaluate_script('document.getElementsByTagName(\'a\')').map { |el| el['href'] }
|
44
51
|
begin
|
45
52
|
# Our javascript adds a class if the content has been succesfully submitted
|
46
53
|
page.find(:css, 'html.tolq-content-updated')
|
47
54
|
rescue Capybara::Ambiguous, Capybara::ElementNotFound => e
|
48
|
-
puts "Something went wrong with submitting the content #{e.
|
55
|
+
puts "Something went wrong with submitting the content: #{e.message}"
|
49
56
|
end
|
50
57
|
links
|
51
58
|
rescue => e
|
52
|
-
puts "Error parsing #{@url.to_s}, #{e.
|
59
|
+
puts "Error parsing #{@url.to_s}, #{e.message}"
|
53
60
|
[]
|
54
61
|
end
|
55
62
|
end
|
data/lib/spieker/version.rb
CHANGED