proxycrawl 0.3.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 464bcfcfd4be75be12ec870b5cb2ac5d9f38ed01e1fa2ba44ee1746cf9031795
4
- data.tar.gz: 579e140f00efd51ec451b1e33d926d079b3cd5f4353e3a1c001bf37ce04e4e51
3
+ metadata.gz: c225955f21200efc0790c401385727e3b9dd9bc20f785ce768f44f687f55105b
4
+ data.tar.gz: 0a38e44493065a9779366d4ab5372771bf92b020da513059c0cf20b69e2d696a
5
5
  SHA512:
6
- metadata.gz: 00d314fe67826d76c4ec0470cc5419495d047ed0aa5a408d9d625c45919bd5f1f3c207e0fca30feb66b1b8360e9e7c80f3e9b99967ea6661d5bafb8b981930e1
7
- data.tar.gz: 67e1685970d48281b357bcda337fa7bbab40794e9488c5afd3af2810b2cb66a7494e3c8135ca6ff0c085b1d3aa329297b527788d614d2ae578f3b2adb9e5894e
6
+ metadata.gz: 2d81900546063ff3ddbbe660f528b8cbc04336916bce468f37c8791370b7dd99c36d32915dd471ff886bc6fb8fb498ddccc4adfb40c95009849e25a1f4938805
7
+ data.tar.gz: cd167890621d7d8bcbdf1c2502e545c9ea40c95abf437927e443dc1e3694d8393fae4d1c521e7b34227c4a962e172cb085697169d43f9ee1ff99369649e6c36f
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2021 ProxyCrawl
3
+ Copyright (c) 2022 ProxyCrawl
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -361,4 +361,4 @@ Everyone interacting in the Proxycrawl project’s codebases, issue trackers, ch
361
361
 
362
362
  ---
363
363
 
364
- Copyright 2021 ProxyCrawl
364
+ Copyright 2022 ProxyCrawl
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class API
9
- attr_reader :token, :body, :status_code, :original_status, :pc_status, :url, :storage_url
9
+ attr_reader :token, :body, :timeout, :status_code, :original_status, :pc_status, :url, :storage_url
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_URL = 'URL is required'
@@ -15,14 +15,22 @@ module ProxyCrawl
15
15
  raise INVALID_TOKEN if options[:token].nil?
16
16
 
17
17
  @token = options[:token]
18
+ @timeout = options[:timeout] || 120
18
19
  end
19
20
 
20
21
  def get(url, options = {})
21
22
  raise INVALID_URL if url.empty?
22
23
 
23
24
  uri = prepare_uri(url, options)
25
+ req = Net::HTTP::Get.new(uri)
24
26
 
25
- response = Net::HTTP.get_response(uri)
27
+ req_options = {
28
+ read_timeout: timeout,
29
+ use_ssl: uri.scheme == 'https',
30
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
31
+ }
32
+
33
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
26
34
 
27
35
  prepare_response(response, options[:format])
28
36
 
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class LeadsAPI
9
- attr_reader :token, :body, :status_code, :success, :remaining_requests
9
+ attr_reader :token, :timeout, :body, :status_code, :success, :remaining_requests
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_DOMAIN = 'Domain is required'
@@ -15,6 +15,7 @@ module ProxyCrawl
15
15
  raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
16
16
 
17
17
  @token = options[:token]
18
+ @timeout = options[:timeout] || 120
18
19
  end
19
20
 
20
21
  def get(domain)
@@ -23,7 +24,15 @@ module ProxyCrawl
23
24
  uri = URI('https://api.proxycrawl.com/leads')
24
25
  uri.query = URI.encode_www_form({ token: token, domain: domain })
25
26
 
26
- response = Net::HTTP.get_response(uri)
27
+ req = Net::HTTP::Get.new(uri)
28
+
29
+ req_options = {
30
+ read_timeout: timeout,
31
+ use_ssl: uri.scheme == 'https',
32
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
33
+ }
34
+
35
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
27
36
  @status_code = response.code.to_i
28
37
  @body = response.body
29
38
 
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class StorageAPI
9
- attr_reader :token, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
9
+ attr_reader :token, :timeout, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_RID = 'RID is required'
@@ -18,6 +18,7 @@ module ProxyCrawl
18
18
  raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
19
19
 
20
20
  @token = options[:token]
21
+ @timeout = options[:timeout] || 120
21
22
  end
22
23
 
23
24
  def get(url_or_rid, format = 'html')
@@ -25,7 +26,16 @@ module ProxyCrawl
25
26
 
26
27
  uri = URI(BASE_URL)
27
28
  uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
28
- response = Net::HTTP.get_response(uri)
29
+
30
+ req = Net::HTTP::Get.new(uri)
31
+
32
+ req_options = {
33
+ read_timeout: timeout,
34
+ use_ssl: uri.scheme == 'https',
35
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
36
+ }
37
+
38
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
29
39
 
30
40
  res = format == 'json' ? JSON.parse(response.body) : response
31
41
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ProxyCrawl
4
- VERSION = '0.3.1'
4
+ VERSION = '1.0.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxycrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - proxycrawl
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-07-14 00:00:00.000000000 Z
11
+ date: 2022-08-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -109,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  requirements: []
112
- rubygems_version: 3.1.4
112
+ rubygems_version: 3.1.2
113
113
  signing_key:
114
114
  specification_version: 4
115
115
  summary: ProxyCrawl API client for web scraping and crawling