tiny_grabber 0.3.7 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/tiny_grabber.rb +16 -0
- data/lib/tiny_grabber/agent.rb +10 -4
- data/lib/tiny_grabber/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a6777d3e08c7225d080d075a226b8df733c46a09
|
|
4
|
+
data.tar.gz: cc2d7ea134106bd79e9c80a9ca8b7741ce7b0682
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9c1ccf0302e6080034ef1558e408159070aa05478d0dfda6bc76adacae2994791e3c8021d3fe5a2b9250289385abeaa3b9925c1e7e9f9502bd86797212261b27
|
|
7
|
+
data.tar.gz: 3785f3f616acc935d12690d9e197bebdf1a8db1c387489f2e7b0714a30b02c9fe41906fac208ee1ace25f97bef0b4ad323c9153ab7ff17fb7c53421d40507cad
|
data/README.md
CHANGED
data/lib/tiny_grabber.rb
CHANGED
|
@@ -21,6 +21,7 @@ class TinyGrabber
|
|
|
21
21
|
def self.initialize(config = {})
|
|
22
22
|
@agent = TinyGrabber::Agent.new
|
|
23
23
|
|
|
24
|
+
@agent.perfect_url = config[:perfect_url] if config[:perfect_url]
|
|
24
25
|
@agent.debug = config[:debug] if config[:debug]
|
|
25
26
|
@agent.read_timeout = config[:read_timeout] if config[:read_timeout]
|
|
26
27
|
@agent.user_agent = config[:user_agent] if config[:user_agent]
|
|
@@ -77,6 +78,21 @@ class TinyGrabber
|
|
|
77
78
|
@agent.debug = debug
|
|
78
79
|
end
|
|
79
80
|
|
|
81
|
+
# Read perfect_url agent attribute
|
|
82
|
+
#
|
|
83
|
+
def perfect_url
|
|
84
|
+
@agent.perfect_url
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Set perfect_url agent attribute
|
|
88
|
+
#
|
|
89
|
+
# @param perfect_url Removes additional processing url
|
|
90
|
+
#
|
|
91
|
+
def perfect_url=(perfect_url)
|
|
92
|
+
@agent.perfect_url = perfect_url
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
|
|
80
96
|
# Read READ_TIMEOUT agent attribute
|
|
81
97
|
#
|
|
82
98
|
def read_timeout
|
data/lib/tiny_grabber/agent.rb
CHANGED
|
@@ -24,6 +24,8 @@ class TinyGrabber
|
|
|
24
24
|
attr_writer :follow_location
|
|
25
25
|
# Uri
|
|
26
26
|
attr_accessor :uri
|
|
27
|
+
# perfect url
|
|
28
|
+
attr_accessor :perfect_url
|
|
27
29
|
|
|
28
30
|
# Agent aliases given from http://www.useragentstring.com/pages/Chrome/
|
|
29
31
|
AGENT_ALIASES = [
|
|
@@ -60,6 +62,7 @@ class TinyGrabber
|
|
|
60
62
|
@basic_auth = {}
|
|
61
63
|
@headers = {}
|
|
62
64
|
@cookies = nil
|
|
65
|
+
@perfect_url = false
|
|
63
66
|
@follow_location = false
|
|
64
67
|
@read_timeout = 10
|
|
65
68
|
# Initialize variable for URI object
|
|
@@ -249,10 +252,13 @@ class TinyGrabber
|
|
|
249
252
|
# @param url Request link
|
|
250
253
|
#
|
|
251
254
|
def convert_to_uri(url)
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
255
|
+
unless @perfect_url
|
|
256
|
+
# Remove anchor
|
|
257
|
+
url = url.gsub(/#.*\Z/, '')
|
|
258
|
+
# It's magic work with escaped url
|
|
259
|
+
url = URI.escape(URI.unescape(url))
|
|
260
|
+
end
|
|
261
|
+
@uri = URI(url)
|
|
256
262
|
@debug.save "-> [uri] = #{@uri}" if @debug.active
|
|
257
263
|
end
|
|
258
264
|
|
data/lib/tiny_grabber/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tiny_grabber
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Aleksandr Chernyshev
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-
|
|
11
|
+
date: 2016-12-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: socksify
|