arachnid2 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e0c363d07cd32c0cbef2130ca5bb5a340ffbb2ea41c9ada00e0e292e5058753
4
- data.tar.gz: cdc27f16c8f9b0b84878b9336b6101fb8fa5961e890136b5aa891e3d7fe3cc73
3
+ metadata.gz: 35ba8ebca2f7ed2459bdf6db38053835a6977bb51b34627fd2f9642bb3b7abb3
4
+ data.tar.gz: f3ed7cd8b5cbabb1643bb2fee9121a8df97e9ba05c4d5452f53e32fa91bd8090
5
5
  SHA512:
6
- metadata.gz: a299e7db051a591bd131b1f633476e70d7dd45a7216c57e02bc51705b7e216be87c8d9b98f497fb084bbae1b06b0a07188884a486b79da951202e6e582bed5f9
7
- data.tar.gz: 4148aa53935e6c52c45ce5a349624366a524ac2783f2615346fed73c04b968b46596500ffe1dc7f12389820506748ce65c70ee5f38c0dacbcc3c8e94cf0701f1
6
+ metadata.gz: b99ae55035e1064fa2140d8d5c13e924a47ba52905611586090c0e35650e31d77067c39a00245d0be5364945f24a025821f18b45917b8584fd3aec72fd0190b3
7
+ data.tar.gz: ebf1e8ca73ed4a964b0b33de3ca396ba48b59df1777402ac9cc8d3ba1126cd5ea63b559c5d55a88f51595af8f58acefa592cefebaae8f590d697586b5ac27461
data/README.md CHANGED
@@ -67,6 +67,12 @@ opts = {
67
67
  max_urls: 50,
68
68
  language: "en-UK",
69
69
  user_agent: "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0"
70
+ proxy: {
71
+ ip: "1.2.3.4",
72
+ port: "1234",
73
+ username: "sam",
74
+ password: "coolcoolcool",
75
+ }
70
76
  }
71
77
  responses = []
72
78
 
@@ -100,6 +106,12 @@ This user agent is a string mapped to the HTTP header User-Agent. The
100
106
  default is
101
107
  `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1 Safari/605.1.15`
102
108
 
109
+ #### `proxy`
110
+
111
+ Provide your IP, port for a proxy. If required, provide credentials for
112
+ authenticating to that proxy. Proxy options and handling are done
113
+ by Typhoeus.
114
+
103
115
  ### Memory use in Docker
104
116
 
105
117
  In case you are operating the crawler within a container, Arachnid2
data/lib/arachnid2.rb CHANGED
@@ -65,8 +65,16 @@ class Arachnid2
65
65
  #
66
66
  # opts = {
67
67
  # :time_box => 30,
68
- # :language => "es-IO",
69
- # :user_agent => "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0"
68
+ # :headers => {
69
+ # 'Accept-Language' => "en-UK",
70
+ # 'User-Agent' => "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0",
71
+ # },
72
+ # :proxy => {
73
+ # :ip => "1.2.3.4",
74
+ # :port => "1234",
75
+ # :username => "sam",
76
+ # :password => "coolcoolcool",
77
+ # }
70
78
  # }
71
79
  # responses = []
72
80
  # spider.crawl(opts) { |response|
@@ -175,25 +183,27 @@ class Arachnid2
175
183
  followlocation: true,
176
184
  cookiefile: @cookie_file.path,
177
185
  cookiejar: @cookie_file.path,
178
- headers: {
179
- 'Accept-Language' => "#{language}",
180
- 'User-Agent' => "#{user_agent}"
181
- }
186
+ headers: @options[:headers]
182
187
  }
183
188
 
189
+ @request_options[:headers] ||= {}
190
+ @request_options[:headers]['Accept-Language'] ||= DEFAULT_LANGUAGE
191
+ @request_options[:headers]['User-Agent'] ||= DEFAULT_USER_AGENT
192
+
184
193
  @request_options
185
194
  end
186
195
 
187
- def language
188
- @options[:language] || DEFAULT_LANGUAGE
189
- end
196
+ def crawl_options
197
+ @crawl_options ||= nil
190
198
 
191
- def user_agent
192
- @options[:user_agent] || DEFAULT_USER_AGENT
193
- end
199
+ if !@crawl_options
200
+ @crawl_options = { :max_urls => max_urls, :time_limit => time_limit }
194
201
 
195
- def crawl_options
196
- { :max_urls => max_urls, :time_limit => time_limit }
202
+ @crawl_options[:proxy] = "#{@options[:proxy][:ip]}:#{@options[:proxy][:port]}" if @options.dig(:proxy, :ip)
203
+ @crawl_options[:proxyuserpwd] = "#{@options[:proxy][:username]}:#{@options[:proxy][:password]}" if @options.dig(:proxy, :username)
204
+ end
205
+
206
+ @crawl_options
197
207
  end
198
208
 
199
209
  def max_urls
@@ -1,3 +1,3 @@
1
1
  class Arachnid2
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arachnid2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Nissen
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-05-30 00:00:00.000000000 Z
11
+ date: 2018-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler