webscraping_ai 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,12 @@
1
1
  =begin
2
2
  #WebScraping.AI
3
3
 
4
- #A client for https://webscraping.ai API. It provides Chrome JS rendering, rotating proxies and HTML parsing for web scraping.
5
-
6
- The version of the OpenAPI document: 1.0.0
4
+ #A client for https://webscraping.ai API. It provides a web scaping automation API with Chrome JS rendering, rotating proxies and builtin HTML parsing.
7
5
 
6
+ The version of the OpenAPI document: 2.0.0
7
+ Contact: support@webscraping.ai
8
8
  Generated by: https://openapi-generator.tech
9
- OpenAPI Generator version: 4.2.3
9
+ OpenAPI Generator version: 4.3.1
10
10
 
11
11
  =end
12
12
 
@@ -269,11 +269,13 @@ module WebScrapingAI
269
269
  tempfile.write(chunk)
270
270
  end
271
271
  request.on_complete do |response|
272
- tempfile.close if tempfile
273
- @config.logger.info "Temp file written to #{tempfile.path}, please copy the file to a proper folder "\
274
- "with e.g. `FileUtils.cp(tempfile.path, '/new/file/path')` otherwise the temp file "\
275
- "will be deleted automatically with GC. It's also recommended to delete the temp file "\
276
- "explicitly with `tempfile.delete`"
272
+ if tempfile
273
+ tempfile.close
274
+ @config.logger.info "Temp file written to #{tempfile.path}, please copy the file to a proper folder "\
275
+ "with e.g. `FileUtils.cp(tempfile.path, '/new/file/path')` otherwise the temp file "\
276
+ "will be deleted automatically with GC. It's also recommended to delete the temp file "\
277
+ "explicitly with `tempfile.delete`"
278
+ end
277
279
  end
278
280
  end
279
281
 
@@ -1,12 +1,12 @@
1
1
  =begin
2
2
  #WebScraping.AI
3
3
 
4
- #A client for https://webscraping.ai API. It provides Chrome JS rendering, rotating proxies and HTML parsing for web scraping.
5
-
6
- The version of the OpenAPI document: 1.0.0
4
+ #A client for https://webscraping.ai API. It provides a web scaping automation API with Chrome JS rendering, rotating proxies and builtin HTML parsing.
7
5
 
6
+ The version of the OpenAPI document: 2.0.0
7
+ Contact: support@webscraping.ai
8
8
  Generated by: https://openapi-generator.tech
9
- OpenAPI Generator version: 4.2.3
9
+ OpenAPI Generator version: 4.3.1
10
10
 
11
11
  =end
12
12
 
@@ -1,12 +1,12 @@
1
1
  =begin
2
2
  #WebScraping.AI
3
3
 
4
- #A client for https://webscraping.ai API. It provides Chrome JS rendering, rotating proxies and HTML parsing for web scraping.
5
-
6
- The version of the OpenAPI document: 1.0.0
4
+ #A client for https://webscraping.ai API. It provides a web scaping automation API with Chrome JS rendering, rotating proxies and builtin HTML parsing.
7
5
 
6
+ The version of the OpenAPI document: 2.0.0
7
+ Contact: support@webscraping.ai
8
8
  Generated by: https://openapi-generator.tech
9
- OpenAPI Generator version: 4.2.3
9
+ OpenAPI Generator version: 4.3.1
10
10
 
11
11
  =end
12
12
 
@@ -127,8 +127,8 @@ module WebScrapingAI
127
127
 
128
128
  def initialize
129
129
  @scheme = 'https'
130
- @host = 'webscraping.ai'
131
- @base_path = '/api'
130
+ @host = 'api.webscraping.ai'
131
+ @base_path = ''
132
132
  @api_key = {}
133
133
  @api_key_prefix = {}
134
134
  @timeout = 0
@@ -207,7 +207,7 @@ module WebScrapingAI
207
207
  def server_settings
208
208
  [
209
209
  {
210
- url: "https://webscraping.ai/api/",
210
+ url: "https://api.webscraping.ai",
211
211
  description: "No description provided",
212
212
  }
213
213
  ]
@@ -0,0 +1,207 @@
1
+ =begin
2
+ #WebScraping.AI
3
+
4
+ #A client for https://webscraping.ai API. It provides a web scaping automation API with Chrome JS rendering, rotating proxies and builtin HTML parsing.
5
+
6
+ The version of the OpenAPI document: 2.0.0
7
+ Contact: support@webscraping.ai
8
+ Generated by: https://openapi-generator.tech
9
+ OpenAPI Generator version: 4.3.1
10
+
11
+ =end
12
+
13
+ require 'date'
14
+
15
+ module WebScrapingAI
16
+ class Error
17
+ # Error description
18
+ attr_accessor :message
19
+
20
+ # Attribute mapping from ruby-style variable name to JSON key.
21
+ def self.attribute_map
22
+ {
23
+ :'message' => :'message'
24
+ }
25
+ end
26
+
27
+ # Attribute type mapping.
28
+ def self.openapi_types
29
+ {
30
+ :'message' => :'String'
31
+ }
32
+ end
33
+
34
+ # List of attributes with nullable: true
35
+ def self.openapi_nullable
36
+ Set.new([
37
+ ])
38
+ end
39
+
40
+ # Initializes the object
41
+ # @param [Hash] attributes Model attributes in the form of hash
42
+ def initialize(attributes = {})
43
+ if (!attributes.is_a?(Hash))
44
+ fail ArgumentError, "The input argument (attributes) must be a hash in `WebScrapingAI::Error` initialize method"
45
+ end
46
+
47
+ # check to see if the attribute exists and convert string to symbol for hash key
48
+ attributes = attributes.each_with_object({}) { |(k, v), h|
49
+ if (!self.class.attribute_map.key?(k.to_sym))
50
+ fail ArgumentError, "`#{k}` is not a valid attribute in `WebScrapingAI::Error`. Please check the name to make sure it's valid. List of attributes: " + self.class.attribute_map.keys.inspect
51
+ end
52
+ h[k.to_sym] = v
53
+ }
54
+
55
+ if attributes.key?(:'message')
56
+ self.message = attributes[:'message']
57
+ end
58
+ end
59
+
60
+ # Show invalid properties with the reasons. Usually used together with valid?
61
+ # @return Array for valid properties with the reasons
62
+ def list_invalid_properties
63
+ invalid_properties = Array.new
64
+ invalid_properties
65
+ end
66
+
67
+ # Check to see if the all the properties in the model are valid
68
+ # @return true if the model is valid
69
+ def valid?
70
+ true
71
+ end
72
+
73
+ # Checks equality by comparing each attribute.
74
+ # @param [Object] Object to be compared
75
+ def ==(o)
76
+ return true if self.equal?(o)
77
+ self.class == o.class &&
78
+ message == o.message
79
+ end
80
+
81
+ # @see the `==` method
82
+ # @param [Object] Object to be compared
83
+ def eql?(o)
84
+ self == o
85
+ end
86
+
87
+ # Calculates hash code according to all attributes.
88
+ # @return [Integer] Hash code
89
+ def hash
90
+ [message].hash
91
+ end
92
+
93
+ # Builds the object from hash
94
+ # @param [Hash] attributes Model attributes in the form of hash
95
+ # @return [Object] Returns the model itself
96
+ def self.build_from_hash(attributes)
97
+ new.build_from_hash(attributes)
98
+ end
99
+
100
+ # Builds the object from hash
101
+ # @param [Hash] attributes Model attributes in the form of hash
102
+ # @return [Object] Returns the model itself
103
+ def build_from_hash(attributes)
104
+ return nil unless attributes.is_a?(Hash)
105
+ self.class.openapi_types.each_pair do |key, type|
106
+ if type =~ /\AArray<(.*)>/i
107
+ # check to ensure the input is an array given that the attribute
108
+ # is documented as an array but the input is not
109
+ if attributes[self.class.attribute_map[key]].is_a?(Array)
110
+ self.send("#{key}=", attributes[self.class.attribute_map[key]].map { |v| _deserialize($1, v) })
111
+ end
112
+ elsif !attributes[self.class.attribute_map[key]].nil?
113
+ self.send("#{key}=", _deserialize(type, attributes[self.class.attribute_map[key]]))
114
+ end # or else data not found in attributes(hash), not an issue as the data can be optional
115
+ end
116
+
117
+ self
118
+ end
119
+
120
+ # Deserializes the data based on type
121
+ # @param string type Data type
122
+ # @param string value Value to be deserialized
123
+ # @return [Object] Deserialized data
124
+ def _deserialize(type, value)
125
+ case type.to_sym
126
+ when :DateTime
127
+ DateTime.parse(value)
128
+ when :Date
129
+ Date.parse(value)
130
+ when :String
131
+ value.to_s
132
+ when :Integer
133
+ value.to_i
134
+ when :Float
135
+ value.to_f
136
+ when :Boolean
137
+ if value.to_s =~ /\A(true|t|yes|y|1)\z/i
138
+ true
139
+ else
140
+ false
141
+ end
142
+ when :Object
143
+ # generic object (usually a Hash), return directly
144
+ value
145
+ when /\AArray<(?<inner_type>.+)>\z/
146
+ inner_type = Regexp.last_match[:inner_type]
147
+ value.map { |v| _deserialize(inner_type, v) }
148
+ when /\AHash<(?<k_type>.+?), (?<v_type>.+)>\z/
149
+ k_type = Regexp.last_match[:k_type]
150
+ v_type = Regexp.last_match[:v_type]
151
+ {}.tap do |hash|
152
+ value.each do |k, v|
153
+ hash[_deserialize(k_type, k)] = _deserialize(v_type, v)
154
+ end
155
+ end
156
+ else # model
157
+ WebScrapingAI.const_get(type).build_from_hash(value)
158
+ end
159
+ end
160
+
161
+ # Returns the string representation of the object
162
+ # @return [String] String presentation of the object
163
+ def to_s
164
+ to_hash.to_s
165
+ end
166
+
167
+ # to_body is an alias to to_hash (backward compatibility)
168
+ # @return [Hash] Returns the object in the form of hash
169
+ def to_body
170
+ to_hash
171
+ end
172
+
173
+ # Returns the object in the form of hash
174
+ # @return [Hash] Returns the object in the form of hash
175
+ def to_hash
176
+ hash = {}
177
+ self.class.attribute_map.each_pair do |attr, param|
178
+ value = self.send(attr)
179
+ if value.nil?
180
+ is_nullable = self.class.openapi_nullable.include?(attr)
181
+ next if !is_nullable || (is_nullable && !instance_variable_defined?(:"@#{attr}"))
182
+ end
183
+
184
+ hash[param] = _to_hash(value)
185
+ end
186
+ hash
187
+ end
188
+
189
+ # Outputs non-array value in the form of hash
190
+ # For object, use to_hash. Otherwise, just return the value
191
+ # @param [Object] value Any valid value
192
+ # @return [Hash] Returns the value in the form of hash
193
+ def _to_hash(value)
194
+ if value.is_a?(Array)
195
+ value.compact.map { |v| _to_hash(v) }
196
+ elsif value.is_a?(Hash)
197
+ {}.tap do |hash|
198
+ value.each { |k, v| hash[k] = _to_hash(v) }
199
+ end
200
+ elsif value.respond_to? :to_hash
201
+ value.to_hash
202
+ else
203
+ value
204
+ end
205
+ end
206
+ end
207
+ end
@@ -1,27 +1,21 @@
1
1
  =begin
2
2
  #WebScraping.AI
3
3
 
4
- #A client for https://webscraping.ai API. It provides Chrome JS rendering, rotating proxies and HTML parsing for web scraping.
5
-
6
- The version of the OpenAPI document: 1.0.0
4
+ #A client for https://webscraping.ai API. It provides a web scaping automation API with Chrome JS rendering, rotating proxies and builtin HTML parsing.
7
5
 
6
+ The version of the OpenAPI document: 2.0.0
7
+ Contact: support@webscraping.ai
8
8
  Generated by: https://openapi-generator.tech
9
- OpenAPI Generator version: 4.2.3
9
+ OpenAPI Generator version: 4.3.1
10
10
 
11
11
  =end
12
12
 
13
13
  require 'date'
14
14
 
15
15
  module WebScrapingAI
16
- class ScrappedPage
17
- # Page HTML content size in bytes
18
- attr_accessor :size_bytes
19
-
20
- # HTML of the full page or a selected area
21
- attr_accessor :html
22
-
23
- # Response HTTP status code (200, 404, 302, etc)
24
- attr_accessor :status
16
+ class PageError
17
+ # Response HTTP status code (403, 500, etc)
18
+ attr_accessor :status_code
25
19
 
26
20
  # Response HTTP status message
27
21
  attr_accessor :status_message
@@ -29,19 +23,15 @@ module WebScrapingAI
29
23
  # Attribute mapping from ruby-style variable name to JSON key.
30
24
  def self.attribute_map
31
25
  {
32
- :'size_bytes' => :'size_bytes',
33
- :'html' => :'html',
34
- :'status' => :'status',
35
- :'status_message' => :'statusMessage'
26
+ :'status_code' => :'status_code',
27
+ :'status_message' => :'status_message'
36
28
  }
37
29
  end
38
30
 
39
31
  # Attribute type mapping.
40
32
  def self.openapi_types
41
33
  {
42
- :'size_bytes' => :'Integer',
43
- :'html' => :'String',
44
- :'status' => :'Integer',
34
+ :'status_code' => :'Integer',
45
35
  :'status_message' => :'String'
46
36
  }
47
37
  end
@@ -56,27 +46,19 @@ module WebScrapingAI
56
46
  # @param [Hash] attributes Model attributes in the form of hash
57
47
  def initialize(attributes = {})
58
48
  if (!attributes.is_a?(Hash))
59
- fail ArgumentError, "The input argument (attributes) must be a hash in `WebScrapingAI::ScrappedPage` initialize method"
49
+ fail ArgumentError, "The input argument (attributes) must be a hash in `WebScrapingAI::PageError` initialize method"
60
50
  end
61
51
 
62
52
  # check to see if the attribute exists and convert string to symbol for hash key
63
53
  attributes = attributes.each_with_object({}) { |(k, v), h|
64
54
  if (!self.class.attribute_map.key?(k.to_sym))
65
- fail ArgumentError, "`#{k}` is not a valid attribute in `WebScrapingAI::ScrappedPage`. Please check the name to make sure it's valid. List of attributes: " + self.class.attribute_map.keys.inspect
55
+ fail ArgumentError, "`#{k}` is not a valid attribute in `WebScrapingAI::PageError`. Please check the name to make sure it's valid. List of attributes: " + self.class.attribute_map.keys.inspect
66
56
  end
67
57
  h[k.to_sym] = v
68
58
  }
69
59
 
70
- if attributes.key?(:'size_bytes')
71
- self.size_bytes = attributes[:'size_bytes']
72
- end
73
-
74
- if attributes.key?(:'html')
75
- self.html = attributes[:'html']
76
- end
77
-
78
- if attributes.key?(:'status')
79
- self.status = attributes[:'status']
60
+ if attributes.key?(:'status_code')
61
+ self.status_code = attributes[:'status_code']
80
62
  end
81
63
 
82
64
  if attributes.key?(:'status_message')
@@ -102,9 +84,7 @@ module WebScrapingAI
102
84
  def ==(o)
103
85
  return true if self.equal?(o)
104
86
  self.class == o.class &&
105
- size_bytes == o.size_bytes &&
106
- html == o.html &&
107
- status == o.status &&
87
+ status_code == o.status_code &&
108
88
  status_message == o.status_message
109
89
  end
110
90
 
@@ -117,7 +97,7 @@ module WebScrapingAI
117
97
  # Calculates hash code according to all attributes.
118
98
  # @return [Integer] Hash code
119
99
  def hash
120
- [size_bytes, html, status, status_message].hash
100
+ [status_code, status_message].hash
121
101
  end
122
102
 
123
103
  # Builds the object from hash
@@ -1,15 +1,15 @@
1
1
  =begin
2
2
  #WebScraping.AI
3
3
 
4
- #A client for https://webscraping.ai API. It provides Chrome JS rendering, rotating proxies and HTML parsing for web scraping.
5
-
6
- The version of the OpenAPI document: 1.0.0
4
+ #A client for https://webscraping.ai API. It provides a web scaping automation API with Chrome JS rendering, rotating proxies and builtin HTML parsing.
7
5
 
6
+ The version of the OpenAPI document: 2.0.0
7
+ Contact: support@webscraping.ai
8
8
  Generated by: https://openapi-generator.tech
9
- OpenAPI Generator version: 4.2.3
9
+ OpenAPI Generator version: 4.3.1
10
10
 
11
11
  =end
12
12
 
13
13
  module WebScrapingAI
14
- VERSION = '1.0.0'
14
+ VERSION = '2.0.0'
15
15
  end
@@ -1,43 +1,64 @@
1
1
  =begin
2
2
  #WebScraping.AI
3
3
 
4
- #This is a sample server Petstore server. For this sample, you can use the api key `special-key` to test the authorization filters.
5
-
6
- The version of the OpenAPI document: 1.0.0
4
+ #A client for https://webscraping.ai API. It provides a web scaping automation API with Chrome JS rendering, rotating proxies and builtin HTML parsing.
7
5
 
6
+ The version of the OpenAPI document: 2.0.0
7
+ Contact: support@webscraping.ai
8
8
  Generated by: https://openapi-generator.tech
9
- OpenAPI Generator version: 4.2.3
9
+ OpenAPI Generator version: 4.3.1
10
10
 
11
11
  =end
12
12
 
13
13
  require 'spec_helper'
14
14
  require 'json'
15
15
 
16
- # Unit tests for WebScrapingAI::HtmlApi
16
+ # Unit tests for WebScrapingAI::HTMLApi
17
17
  # Automatically generated by openapi-generator (https://openapi-generator.tech)
18
18
  # Please update as you see appropriate
19
- describe 'HtmlApi' do
19
+ describe 'HTMLApi' do
20
20
  before do
21
21
  # run before each test
22
- @api_instance = WebScrapingAI::HtmlApi.new
22
+ @api_instance = WebScrapingAI::HTMLApi.new
23
23
  end
24
24
 
25
25
  after do
26
26
  # run after each test
27
27
  end
28
28
 
29
- describe 'test an instance of HtmlApi' do
30
- it 'should create an instance of HtmlApi' do
31
- expect(@api_instance).to be_instance_of(WebScrapingAI::HtmlApi)
29
+ describe 'test an instance of HTMLApi' do
30
+ it 'should create an instance of HTMLApi' do
31
+ expect(@api_instance).to be_instance_of(WebScrapingAI::HTMLApi)
32
+ end
33
+ end
34
+
35
+ # unit tests for get_html
36
+ # Page HTML by URL
37
+ # Returns just HTML on success, JSON on error
38
+ # @param url URL of the target page
39
+ # @param [Hash] opts the optional parameters
40
+ # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;})
41
+ # @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
42
+ # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
43
+ # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
44
+ # @return [nil]
45
+ describe 'get_html test' do
46
+ it 'should work' do
47
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
32
48
  end
33
49
  end
34
50
 
35
- # unit tests for get_page
36
- # Get page HTML by URL
37
- # @param url URL of the page to get
51
+ # unit tests for post_html
52
+ # Page HTML by URL with POST request to the target page
53
+ # Returns just HTML on success, JSON on error. Request body will be passed to the target page.
54
+ # @param url URL of the target page
38
55
  # @param [Hash] opts the optional parameters
39
- # @return [ScrappedPage]
40
- describe 'get_page test' do
56
+ # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;})
57
+ # @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
58
+ # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
59
+ # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
60
+ # @return [nil]
61
+ describe 'post_html test' do
41
62
  it 'should work' do
42
63
  # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
43
64
  end