proxycrawl 0.3.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 464bcfcfd4be75be12ec870b5cb2ac5d9f38ed01e1fa2ba44ee1746cf9031795
4
- data.tar.gz: 579e140f00efd51ec451b1e33d926d079b3cd5f4353e3a1c001bf37ce04e4e51
3
+ metadata.gz: c225955f21200efc0790c401385727e3b9dd9bc20f785ce768f44f687f55105b
4
+ data.tar.gz: 0a38e44493065a9779366d4ab5372771bf92b020da513059c0cf20b69e2d696a
5
5
  SHA512:
6
- metadata.gz: 00d314fe67826d76c4ec0470cc5419495d047ed0aa5a408d9d625c45919bd5f1f3c207e0fca30feb66b1b8360e9e7c80f3e9b99967ea6661d5bafb8b981930e1
7
- data.tar.gz: 67e1685970d48281b357bcda337fa7bbab40794e9488c5afd3af2810b2cb66a7494e3c8135ca6ff0c085b1d3aa329297b527788d614d2ae578f3b2adb9e5894e
6
+ metadata.gz: 2d81900546063ff3ddbbe660f528b8cbc04336916bce468f37c8791370b7dd99c36d32915dd471ff886bc6fb8fb498ddccc4adfb40c95009849e25a1f4938805
7
+ data.tar.gz: cd167890621d7d8bcbdf1c2502e545c9ea40c95abf437927e443dc1e3694d8393fae4d1c521e7b34227c4a962e172cb085697169d43f9ee1ff99369649e6c36f
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2021 ProxyCrawl
3
+ Copyright (c) 2022 ProxyCrawl
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -361,4 +361,4 @@ Everyone interacting in the Proxycrawl project’s codebases, issue trackers, ch
361
361
 
362
362
  ---
363
363
 
364
- Copyright 2021 ProxyCrawl
364
+ Copyright 2022 ProxyCrawl
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class API
9
- attr_reader :token, :body, :status_code, :original_status, :pc_status, :url, :storage_url
9
+ attr_reader :token, :body, :timeout, :status_code, :original_status, :pc_status, :url, :storage_url
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_URL = 'URL is required'
@@ -15,14 +15,22 @@ module ProxyCrawl
15
15
  raise INVALID_TOKEN if options[:token].nil?
16
16
 
17
17
  @token = options[:token]
18
+ @timeout = options[:timeout] || 120
18
19
  end
19
20
 
20
21
  def get(url, options = {})
21
22
  raise INVALID_URL if url.empty?
22
23
 
23
24
  uri = prepare_uri(url, options)
25
+ req = Net::HTTP::Get.new(uri)
24
26
 
25
- response = Net::HTTP.get_response(uri)
27
+ req_options = {
28
+ read_timeout: timeout,
29
+ use_ssl: uri.scheme == 'https',
30
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
31
+ }
32
+
33
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
26
34
 
27
35
  prepare_response(response, options[:format])
28
36
 
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class LeadsAPI
9
- attr_reader :token, :body, :status_code, :success, :remaining_requests
9
+ attr_reader :token, :timeout, :body, :status_code, :success, :remaining_requests
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_DOMAIN = 'Domain is required'
@@ -15,6 +15,7 @@ module ProxyCrawl
15
15
  raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
16
16
 
17
17
  @token = options[:token]
18
+ @timeout = options[:timeout] || 120
18
19
  end
19
20
 
20
21
  def get(domain)
@@ -23,7 +24,15 @@ module ProxyCrawl
23
24
  uri = URI('https://api.proxycrawl.com/leads')
24
25
  uri.query = URI.encode_www_form({ token: token, domain: domain })
25
26
 
26
- response = Net::HTTP.get_response(uri)
27
+ req = Net::HTTP::Get.new(uri)
28
+
29
+ req_options = {
30
+ read_timeout: timeout,
31
+ use_ssl: uri.scheme == 'https',
32
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
33
+ }
34
+
35
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
27
36
  @status_code = response.code.to_i
28
37
  @body = response.body
29
38
 
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class StorageAPI
9
- attr_reader :token, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
9
+ attr_reader :token, :timeout, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_RID = 'RID is required'
@@ -18,6 +18,7 @@ module ProxyCrawl
18
18
  raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
19
19
 
20
20
  @token = options[:token]
21
+ @timeout = options[:timeout] || 120
21
22
  end
22
23
 
23
24
  def get(url_or_rid, format = 'html')
@@ -25,7 +26,16 @@ module ProxyCrawl
25
26
 
26
27
  uri = URI(BASE_URL)
27
28
  uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
28
- response = Net::HTTP.get_response(uri)
29
+
30
+ req = Net::HTTP::Get.new(uri)
31
+
32
+ req_options = {
33
+ read_timeout: timeout,
34
+ use_ssl: uri.scheme == 'https',
35
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
36
+ }
37
+
38
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
29
39
 
30
40
  res = format == 'json' ? JSON.parse(response.body) : response
31
41
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ProxyCrawl
4
- VERSION = '0.3.1'
4
+ VERSION = '1.0.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxycrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - proxycrawl
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-07-14 00:00:00.000000000 Z
11
+ date: 2022-08-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -109,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  requirements: []
112
- rubygems_version: 3.1.4
112
+ rubygems_version: 3.1.2
113
113
  signing_key:
114
114
  specification_version: 4
115
115
  summary: ProxyCrawl API client for web scraping and crawling