proxycrawl 0.3.1 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 464bcfcfd4be75be12ec870b5cb2ac5d9f38ed01e1fa2ba44ee1746cf9031795
4
- data.tar.gz: 579e140f00efd51ec451b1e33d926d079b3cd5f4353e3a1c001bf37ce04e4e51
3
+ metadata.gz: c2e422acaf7de6fef3bc43e6241265852a9d699dcf8a114af5aab12b84def901
4
+ data.tar.gz: a78a2265f2dde7c80918b36167499667d6c85871bb33c2c63e59d727cf9db47e
5
5
  SHA512:
6
- metadata.gz: 00d314fe67826d76c4ec0470cc5419495d047ed0aa5a408d9d625c45919bd5f1f3c207e0fca30feb66b1b8360e9e7c80f3e9b99967ea6661d5bafb8b981930e1
7
- data.tar.gz: 67e1685970d48281b357bcda337fa7bbab40794e9488c5afd3af2810b2cb66a7494e3c8135ca6ff0c085b1d3aa329297b527788d614d2ae578f3b2adb9e5894e
6
+ metadata.gz: 8b0d9ec78313bdccc5083432f53a1c384280ac2f7725d9309fef20cb488e0e5bc7f625ad03865a34735cd7724d562a45cf91c1abd86e11f7dc610c36aabb354f
7
+ data.tar.gz: 95a92e684149d66a072f47c98d41f4de23cd538d0a85685ec17ff979e3c773e6ca8061191d3455411f6a6dddefe272b6c291cc09e117da2e2d367fca60311fec
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2021 ProxyCrawl
3
+ Copyright (c) 2023 ProxyCrawl
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -1,3 +1,9 @@
1
+ # DEPRECATION NOTICE
2
+
3
+ > :warning: **IMPORTANT:** This gem is no longer maintained or supported. For the latest updates, please use our new gem at [crawlbase-ruby](https://github.com/crawlbase-source/crawlbase-ruby).
4
+
5
+ ---
6
+
1
7
  # ProxyCrawl
2
8
 
3
9
  Dependency free gem for scraping and crawling websites using the ProxyCrawl API.
@@ -361,4 +367,4 @@ Everyone interacting in the Proxycrawl project’s codebases, issue trackers, ch
361
367
 
362
368
  ---
363
369
 
364
- Copyright 2021 ProxyCrawl
370
+ Copyright 2023 ProxyCrawl
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class API
9
- attr_reader :token, :body, :status_code, :original_status, :pc_status, :url, :storage_url
9
+ attr_reader :token, :body, :timeout, :status_code, :original_status, :pc_status, :url, :storage_url
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_URL = 'URL is required'
@@ -15,14 +15,22 @@ module ProxyCrawl
15
15
  raise INVALID_TOKEN if options[:token].nil?
16
16
 
17
17
  @token = options[:token]
18
+ @timeout = options[:timeout] || 120
18
19
  end
19
20
 
20
21
  def get(url, options = {})
21
22
  raise INVALID_URL if url.empty?
22
23
 
23
24
  uri = prepare_uri(url, options)
25
+ req = Net::HTTP::Get.new(uri)
24
26
 
25
- response = Net::HTTP.get_response(uri)
27
+ req_options = {
28
+ read_timeout: timeout,
29
+ use_ssl: uri.scheme == 'https',
30
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
31
+ }
32
+
33
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
26
34
 
27
35
  prepare_response(response, options[:format])
28
36
 
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class LeadsAPI
9
- attr_reader :token, :body, :status_code, :success, :remaining_requests
9
+ attr_reader :token, :timeout, :body, :status_code, :success, :remaining_requests
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_DOMAIN = 'Domain is required'
@@ -15,6 +15,7 @@ module ProxyCrawl
15
15
  raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
16
16
 
17
17
  @token = options[:token]
18
+ @timeout = options[:timeout] || 120
18
19
  end
19
20
 
20
21
  def get(domain)
@@ -23,7 +24,15 @@ module ProxyCrawl
23
24
  uri = URI('https://api.proxycrawl.com/leads')
24
25
  uri.query = URI.encode_www_form({ token: token, domain: domain })
25
26
 
26
- response = Net::HTTP.get_response(uri)
27
+ req = Net::HTTP::Get.new(uri)
28
+
29
+ req_options = {
30
+ read_timeout: timeout,
31
+ use_ssl: uri.scheme == 'https',
32
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
33
+ }
34
+
35
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
27
36
  @status_code = response.code.to_i
28
37
  @body = response.body
29
38
 
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class StorageAPI
9
- attr_reader :token, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
9
+ attr_reader :token, :timeout, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_RID = 'RID is required'
@@ -18,6 +18,7 @@ module ProxyCrawl
18
18
  raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
19
19
 
20
20
  @token = options[:token]
21
+ @timeout = options[:timeout] || 120
21
22
  end
22
23
 
23
24
  def get(url_or_rid, format = 'html')
@@ -25,7 +26,16 @@ module ProxyCrawl
25
26
 
26
27
  uri = URI(BASE_URL)
27
28
  uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
28
- response = Net::HTTP.get_response(uri)
29
+
30
+ req = Net::HTTP::Get.new(uri)
31
+
32
+ req_options = {
33
+ read_timeout: timeout,
34
+ use_ssl: uri.scheme == 'https',
35
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
36
+ }
37
+
38
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
29
39
 
30
40
  res = format == 'json' ? JSON.parse(response.body) : response
31
41
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ProxyCrawl
4
- VERSION = '0.3.1'
4
+ VERSION = '1.0.1'
5
5
  end
data/proxycrawl.gemspec CHANGED
@@ -28,4 +28,7 @@ Gem::Specification.new do |spec|
28
28
  spec.add_development_dependency "webmock", "~> 3.4"
29
29
  spec.add_development_dependency "bundler", "~> 2.0"
30
30
  spec.add_development_dependency "rake", "~> 12.3.3"
31
+
32
+ # Deprecation warning
33
+ spec.post_install_message = "DEPRECATION WARNING: This package is no longer maintained due to rebranding. Please use the 'crawlbase' gem instead. More info: https://github.com/crawlbase-source/crawlbase-ruby"
31
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxycrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - proxycrawl
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-07-14 00:00:00.000000000 Z
11
+ date: 2023-07-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -94,7 +94,8 @@ homepage: https://github.com/proxycrawl/proxycrawl-ruby
94
94
  licenses:
95
95
  - MIT
96
96
  metadata: {}
97
- post_install_message:
97
+ post_install_message: 'DEPRECATION WARNING: This package is no longer maintained due
98
+ to rebranding. Please use the ''crawlbase'' gem instead. More info: https://github.com/crawlbase-source/crawlbase-ruby'
98
99
  rdoc_options: []
99
100
  require_paths:
100
101
  - lib
@@ -109,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
110
  - !ruby/object:Gem::Version
110
111
  version: '0'
111
112
  requirements: []
112
- rubygems_version: 3.1.4
113
+ rubygems_version: 3.1.2
113
114
  signing_key:
114
115
  specification_version: 4
115
116
  summary: ProxyCrawl API client for web scraping and crawling