proxycrawl 0.3.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 464bcfcfd4be75be12ec870b5cb2ac5d9f38ed01e1fa2ba44ee1746cf9031795
4
- data.tar.gz: 579e140f00efd51ec451b1e33d926d079b3cd5f4353e3a1c001bf37ce04e4e51
3
+ metadata.gz: c2e422acaf7de6fef3bc43e6241265852a9d699dcf8a114af5aab12b84def901
4
+ data.tar.gz: a78a2265f2dde7c80918b36167499667d6c85871bb33c2c63e59d727cf9db47e
5
5
  SHA512:
6
- metadata.gz: 00d314fe67826d76c4ec0470cc5419495d047ed0aa5a408d9d625c45919bd5f1f3c207e0fca30feb66b1b8360e9e7c80f3e9b99967ea6661d5bafb8b981930e1
7
- data.tar.gz: 67e1685970d48281b357bcda337fa7bbab40794e9488c5afd3af2810b2cb66a7494e3c8135ca6ff0c085b1d3aa329297b527788d614d2ae578f3b2adb9e5894e
6
+ metadata.gz: 8b0d9ec78313bdccc5083432f53a1c384280ac2f7725d9309fef20cb488e0e5bc7f625ad03865a34735cd7724d562a45cf91c1abd86e11f7dc610c36aabb354f
7
+ data.tar.gz: 95a92e684149d66a072f47c98d41f4de23cd538d0a85685ec17ff979e3c773e6ca8061191d3455411f6a6dddefe272b6c291cc09e117da2e2d367fca60311fec
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2021 ProxyCrawl
3
+ Copyright (c) 2023 ProxyCrawl
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -1,3 +1,9 @@
1
+ # DEPRECATION NOTICE
2
+
3
+ > :warning: **IMPORTANT:** This gem is no longer maintained or supported. For the latest updates, please use our new gem at [crawlbase-ruby](https://github.com/crawlbase-source/crawlbase-ruby).
4
+
5
+ ---
6
+
1
7
  # ProxyCrawl
2
8
 
3
9
  Dependency free gem for scraping and crawling websites using the ProxyCrawl API.
@@ -361,4 +367,4 @@ Everyone interacting in the Proxycrawl project’s codebases, issue trackers, ch
361
367
 
362
368
  ---
363
369
 
364
- Copyright 2021 ProxyCrawl
370
+ Copyright 2023 ProxyCrawl
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class API
9
- attr_reader :token, :body, :status_code, :original_status, :pc_status, :url, :storage_url
9
+ attr_reader :token, :body, :timeout, :status_code, :original_status, :pc_status, :url, :storage_url
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_URL = 'URL is required'
@@ -15,14 +15,22 @@ module ProxyCrawl
15
15
  raise INVALID_TOKEN if options[:token].nil?
16
16
 
17
17
  @token = options[:token]
18
+ @timeout = options[:timeout] || 120
18
19
  end
19
20
 
20
21
  def get(url, options = {})
21
22
  raise INVALID_URL if url.empty?
22
23
 
23
24
  uri = prepare_uri(url, options)
25
+ req = Net::HTTP::Get.new(uri)
24
26
 
25
- response = Net::HTTP.get_response(uri)
27
+ req_options = {
28
+ read_timeout: timeout,
29
+ use_ssl: uri.scheme == 'https',
30
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
31
+ }
32
+
33
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
26
34
 
27
35
  prepare_response(response, options[:format])
28
36
 
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class LeadsAPI
9
- attr_reader :token, :body, :status_code, :success, :remaining_requests
9
+ attr_reader :token, :timeout, :body, :status_code, :success, :remaining_requests
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_DOMAIN = 'Domain is required'
@@ -15,6 +15,7 @@ module ProxyCrawl
15
15
  raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
16
16
 
17
17
  @token = options[:token]
18
+ @timeout = options[:timeout] || 120
18
19
  end
19
20
 
20
21
  def get(domain)
@@ -23,7 +24,15 @@ module ProxyCrawl
23
24
  uri = URI('https://api.proxycrawl.com/leads')
24
25
  uri.query = URI.encode_www_form({ token: token, domain: domain })
25
26
 
26
- response = Net::HTTP.get_response(uri)
27
+ req = Net::HTTP::Get.new(uri)
28
+
29
+ req_options = {
30
+ read_timeout: timeout,
31
+ use_ssl: uri.scheme == 'https',
32
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
33
+ }
34
+
35
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
27
36
  @status_code = response.code.to_i
28
37
  @body = response.body
29
38
 
@@ -6,7 +6,7 @@ require 'uri'
6
6
 
7
7
  module ProxyCrawl
8
8
  class StorageAPI
9
- attr_reader :token, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
9
+ attr_reader :token, :timeout, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_RID = 'RID is required'
@@ -18,6 +18,7 @@ module ProxyCrawl
18
18
  raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
19
19
 
20
20
  @token = options[:token]
21
+ @timeout = options[:timeout] || 120
21
22
  end
22
23
 
23
24
  def get(url_or_rid, format = 'html')
@@ -25,7 +26,16 @@ module ProxyCrawl
25
26
 
26
27
  uri = URI(BASE_URL)
27
28
  uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
28
- response = Net::HTTP.get_response(uri)
29
+
30
+ req = Net::HTTP::Get.new(uri)
31
+
32
+ req_options = {
33
+ read_timeout: timeout,
34
+ use_ssl: uri.scheme == 'https',
35
+ verify_mode: OpenSSL::SSL::VERIFY_NONE
36
+ }
37
+
38
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
29
39
 
30
40
  res = format == 'json' ? JSON.parse(response.body) : response
31
41
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ProxyCrawl
4
- VERSION = '0.3.1'
4
+ VERSION = '1.0.1'
5
5
  end
data/proxycrawl.gemspec CHANGED
@@ -28,4 +28,7 @@ Gem::Specification.new do |spec|
28
28
  spec.add_development_dependency "webmock", "~> 3.4"
29
29
  spec.add_development_dependency "bundler", "~> 2.0"
30
30
  spec.add_development_dependency "rake", "~> 12.3.3"
31
+
32
+ # Deprecation warning
33
+ spec.post_install_message = "DEPRECATION WARNING: This package is no longer maintained due to rebranding. Please use the 'crawlbase' gem instead. More info: https://github.com/crawlbase-source/crawlbase-ruby"
31
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxycrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - proxycrawl
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-07-14 00:00:00.000000000 Z
11
+ date: 2023-07-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -94,7 +94,8 @@ homepage: https://github.com/proxycrawl/proxycrawl-ruby
94
94
  licenses:
95
95
  - MIT
96
96
  metadata: {}
97
- post_install_message:
97
+ post_install_message: 'DEPRECATION WARNING: This package is no longer maintained due
98
+ to rebranding. Please use the ''crawlbase'' gem instead. More info: https://github.com/crawlbase-source/crawlbase-ruby'
98
99
  rdoc_options: []
99
100
  require_paths:
100
101
  - lib
@@ -109,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
110
  - !ruby/object:Gem::Version
110
111
  version: '0'
111
112
  requirements: []
112
- rubygems_version: 3.1.4
113
+ rubygems_version: 3.1.2
113
114
  signing_key:
114
115
  specification_version: 4
115
116
  summary: ProxyCrawl API client for web scraping and crawling