spidercloud 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,240 @@
1
+ # Screenshot Endpoint
2
+
3
+ The Screenshot endpoint captures screenshots of web pages with full JavaScript
4
+ rendering support.
5
+
6
+ **API Reference:** https://spider.cloud/docs/api#screenshot
7
+
8
+ ## Basic Usage
9
+
10
+ ```ruby
11
+ response = SpiderCloud.screenshot( 'https://example.com' )
12
+
13
+ # Save to file
14
+ response.result.save_to( 'screenshot.png' )
15
+
16
+ # Or access raw image data
17
+ image_data = response.result.image_data
18
+ ```
19
+
20
+ ## With Options
21
+
22
+ ```ruby
23
+ options = SpiderCloud::ScreenshotOptions.build do
24
+ full_page true
25
+ viewport do
26
+ width 1920
27
+ height 1080
28
+ end
29
+ end
30
+
31
+ response = SpiderCloud.screenshot( 'https://example.com', options )
32
+ ```
33
+
34
+ ## Options Reference
35
+
36
+ ### Screenshot Options
37
+
38
+ | Option | Type | Default | Description |
39
+ |--------|------|---------|-------------|
40
+ | `full_page` | Boolean | true | Capture full scrollable page |
41
+ | `binary` | Boolean | false | Return binary instead of base64 |
42
+ | `omit_background` | Boolean | false | Transparent background (PNG only) |
43
+ | `block_images` | Boolean | false | Block images for faster capture |
44
+
45
+ ### CDP Parameters
46
+
47
+ Chrome DevTools Protocol parameters for advanced control:
48
+
49
+ ```ruby
50
+ cdp_params do
51
+ format :png # :png or :jpeg
52
+ quality 80 # JPEG quality (0-100)
53
+ from_surface true
54
+ capture_beyond_viewport true
55
+ clip do
56
+ x 0
57
+ y 0
58
+ width 800
59
+ height 600
60
+ scale 1
61
+ end
62
+ end
63
+ ```
64
+
65
+ ### Viewport & Device
66
+
67
+ | Option | Type | Description |
68
+ |--------|------|-------------|
69
+ | `viewport` | Hash | Browser viewport `{width:, height:}` |
70
+ | `device` | Symbol | Device: `:mobile`, `:tablet`, `:desktop` |
71
+
72
+ ### Wait Conditions
73
+
74
+ ```ruby
75
+ wait_for do
76
+ # Wait for CSS selector
77
+ selector '#loaded'
78
+
79
+ # Wait for network idle
80
+ idle_network do
81
+ timeout { seconds 5; nanoseconds 0 }
82
+ end
83
+
84
+ # Wait for delay
85
+ delay do
86
+ timeout { seconds 2; nanoseconds 0 }
87
+ end
88
+ end
89
+ ```
90
+
91
+ ### Browser Configuration
92
+
93
+ | Option | Type | Description |
94
+ |--------|------|-------------|
95
+ | `stealth` | Boolean | Stealth mode |
96
+ | `fingerprint` | Boolean | Use fingerprint detection |
97
+ | `scroll` | Integer | Scroll duration before capture (ms) |
98
+ | `block_ads` | Boolean | Block ads |
99
+ | `virtual_display` | Boolean | Use virtual display |
100
+
101
+ ### Proxy Configuration
102
+
103
+ | Option | Type | Description |
104
+ |--------|------|-------------|
105
+ | `proxy` | Symbol | Proxy pool: `:residential`, `:mobile`, `:isp` |
106
+ | `proxy_enabled` | Boolean | Enable proxy |
107
+ | `country_code` | String | ISO country code |
108
+
109
+ ### Authentication
110
+
111
+ | Option | Type | Description |
112
+ |--------|------|-------------|
113
+ | `cookies` | String | HTTP cookies |
114
+ | `headers` | Hash | Custom HTTP headers |
115
+ | `automation_scripts` | Hash | Path-based automation |
116
+
117
+ ## Response
118
+
119
+ ```ruby
120
+ response = SpiderCloud.screenshot( 'https://example.com' )
121
+
122
+ response.result.success? # => true
123
+ response.result.content # => "iVBORw0KGgo..." (base64)
124
+ response.result.image_data # => binary PNG/JPEG data
125
+ response.result.url # => "https://example.com"
126
+ response.result.status # => 200
127
+
128
+ # Save directly to file
129
+ response.result.save_to( 'screenshot.png' )
130
+ ```
131
+
132
+ ## Examples
133
+
134
+ ### Full Page Screenshot
135
+
136
+ ```ruby
137
+ options = SpiderCloud::ScreenshotOptions.build do
138
+ full_page true
139
+ end
140
+
141
+ response = SpiderCloud.screenshot( 'https://example.com', options )
142
+ response.result.save_to( 'full-page.png' )
143
+ ```
144
+
145
+ ### Viewport Screenshot
146
+
147
+ ```ruby
148
+ options = SpiderCloud::ScreenshotOptions.build do
149
+ full_page false
150
+ viewport do
151
+ width 1280
152
+ height 720
153
+ end
154
+ end
155
+
156
+ response = SpiderCloud.screenshot( 'https://example.com', options )
157
+ ```
158
+
159
+ ### Mobile Screenshot
160
+
161
+ ```ruby
162
+ options = SpiderCloud::ScreenshotOptions.build do
163
+ device :mobile
164
+ viewport do
165
+ width 375
166
+ height 812
167
+ end
168
+ end
169
+
170
+ response = SpiderCloud.screenshot( 'https://example.com', options )
171
+ ```
172
+
173
+ ### JPEG with Quality
174
+
175
+ ```ruby
176
+ options = SpiderCloud::ScreenshotOptions.build do
177
+ cdp_params do
178
+ format :jpeg
179
+ quality 85
180
+ end
181
+ end
182
+
183
+ response = SpiderCloud.screenshot( 'https://example.com', options )
184
+ response.result.save_to( 'screenshot.jpg' )
185
+ ```
186
+
187
+ ### Capture Specific Region
188
+
189
+ ```ruby
190
+ options = SpiderCloud::ScreenshotOptions.build do
191
+ cdp_params do
192
+ clip do
193
+ x 100
194
+ y 100
195
+ width 400
196
+ height 300
197
+ scale 1
198
+ end
199
+ end
200
+ end
201
+
202
+ response = SpiderCloud.screenshot( 'https://example.com', options )
203
+ ```
204
+
205
+ ### Wait for Content
206
+
207
+ ```ruby
208
+ options = SpiderCloud::ScreenshotOptions.build do
209
+ wait_for do
210
+ selector '.chart-loaded'
211
+ end
212
+ end
213
+
214
+ response = SpiderCloud.screenshot( 'https://example.com/dashboard', options )
215
+ ```
216
+
217
+ ### With Proxy
218
+
219
+ ```ruby
220
+ options = SpiderCloud::ScreenshotOptions.build do
221
+ proxy :residential
222
+ proxy_enabled true
223
+ country_code 'UK'
224
+ end
225
+
226
+ response = SpiderCloud.screenshot( 'https://example.com', options )
227
+ ```
228
+
229
+ ### Transparent Background
230
+
231
+ ```ruby
232
+ options = SpiderCloud::ScreenshotOptions.build do
233
+ omit_background true
234
+ cdp_params do
235
+ format :png
236
+ end
237
+ end
238
+
239
+ response = SpiderCloud.screenshot( 'https://example.com', options )
240
+ ```
@@ -0,0 +1,40 @@
1
+ require_relative 'lib/spider_cloud/version'
2
+
3
+ Gem::Specification.new do | spec |
4
+
5
+ spec.name = 'spidercloud'
6
+ spec.version = SpiderCloud::VERSION
7
+ spec.authors = [ 'Kristoph Cichocki-Romanov' ]
8
+ spec.email = [ 'rubygems.org@kristoph.net' ]
9
+
10
+ spec.summary =
11
+ "The SpiderCloud gem implements a lightweight interface to the Spider Cloud API for " \
12
+ "web scraping, crawling, screenshots, and link extraction."
13
+ spec.description =
14
+ "The SpiderCloud gem implements a lightweight interface to the Spider Cloud API. Spider " \
15
+ "Cloud provides powerful web scraping and crawling capabilities with support for " \
16
+ "JavaScript rendering, proxy rotation, and anti-bot measures.\n" \
17
+ "\n" \
18
+ "This gem supports scrape, crawl, screenshot, and links endpoints with comprehensive " \
19
+ "options for content extraction, filtering, and automation."
20
+ spec.license = 'MIT'
21
+ spec.homepage = 'https://github.com/EndlessInternational/spider-cloud'
22
+ spec.metadata = {
23
+ 'source_code_uri' => 'https://github.com/EndlessInternational/spider-cloud',
24
+ 'bug_tracker_uri' => 'https://github.com/EndlessInternational/spider-cloud/issues',
25
+ }
26
+
27
+ spec.required_ruby_version = '>= 3.0'
28
+ spec.files = Dir[ "lib/**/*.rb", "readme/**/*.md", "LICENSE", "README.md",
29
+ "spidercloud.gemspec" ]
30
+ spec.require_paths = [ "lib" ]
31
+
32
+ spec.add_runtime_dependency 'faraday', '~> 2'
33
+ spec.add_runtime_dependency 'dynamicschema', '~> 2'
34
+ spec.add_runtime_dependency 'base64', '~> 0.2'
35
+
36
+ spec.add_development_dependency 'minitest', '~> 6'
37
+ spec.add_development_dependency 'debug', '~> 1.11'
38
+ spec.add_development_dependency 'vcr', '~> 6.4'
39
+
40
+ end
metadata ADDED
@@ -0,0 +1,159 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spidercloud
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Kristoph Cichocki-Romanov
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: faraday
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '2'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '2'
26
+ - !ruby/object:Gem::Dependency
27
+ name: dynamicschema
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '2'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '2'
40
+ - !ruby/object:Gem::Dependency
41
+ name: base64
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0.2'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '0.2'
54
+ - !ruby/object:Gem::Dependency
55
+ name: minitest
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '6'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '6'
68
+ - !ruby/object:Gem::Dependency
69
+ name: debug
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.11'
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '1.11'
82
+ - !ruby/object:Gem::Dependency
83
+ name: vcr
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '6.4'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '6.4'
96
+ description: |-
97
+ The SpiderCloud gem implements a lightweight interface to the Spider Cloud API. Spider Cloud provides powerful web scraping and crawling capabilities with support for JavaScript rendering, proxy rotation, and anti-bot measures.
98
+
99
+ This gem supports scrape, crawl, screenshot, and links endpoints with comprehensive options for content extraction, filtering, and automation.
100
+ email:
101
+ - rubygems.org@kristoph.net
102
+ executables: []
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - LICENSE
107
+ - README.md
108
+ - lib/spider_cloud.rb
109
+ - lib/spider_cloud/costs.rb
110
+ - lib/spider_cloud/crawl_options.rb
111
+ - lib/spider_cloud/crawl_request.rb
112
+ - lib/spider_cloud/crawl_result.rb
113
+ - lib/spider_cloud/error_result.rb
114
+ - lib/spider_cloud/helpers.rb
115
+ - lib/spider_cloud/links_options.rb
116
+ - lib/spider_cloud/links_request.rb
117
+ - lib/spider_cloud/links_result.rb
118
+ - lib/spider_cloud/module_methods.rb
119
+ - lib/spider_cloud/request.rb
120
+ - lib/spider_cloud/response_methods.rb
121
+ - lib/spider_cloud/scrape_options.rb
122
+ - lib/spider_cloud/scrape_request.rb
123
+ - lib/spider_cloud/scrape_result.rb
124
+ - lib/spider_cloud/screenshot_options.rb
125
+ - lib/spider_cloud/screenshot_request.rb
126
+ - lib/spider_cloud/screenshot_result.rb
127
+ - lib/spider_cloud/shared_schemas.rb
128
+ - lib/spider_cloud/version.rb
129
+ - lib/spidercloud.rb
130
+ - readme/crawl.md
131
+ - readme/links.md
132
+ - readme/scrape.md
133
+ - readme/screenshot.md
134
+ - spidercloud.gemspec
135
+ homepage: https://github.com/EndlessInternational/spider-cloud
136
+ licenses:
137
+ - MIT
138
+ metadata:
139
+ source_code_uri: https://github.com/EndlessInternational/spider-cloud
140
+ bug_tracker_uri: https://github.com/EndlessInternational/spider-cloud/issues
141
+ rdoc_options: []
142
+ require_paths:
143
+ - lib
144
+ required_ruby_version: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ version: '3.0'
149
+ required_rubygems_version: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - ">="
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ requirements: []
155
+ rubygems_version: 3.6.7
156
+ specification_version: 4
157
+ summary: The SpiderCloud gem implements a lightweight interface to the Spider Cloud
158
+ API for web scraping, crawling, screenshots, and link extraction.
159
+ test_files: []