crawlberg 0.0.1 → 1.0.0.pre.rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,494 @@
1
+ # This file is auto-generated by alef — DO NOT EDIT.
2
+ # alef:hash:cc01a1c094d3c52a5b52b4a912620f9a716410d5ef08dea3520ea2a80cd328ad
3
+ # To regenerate: alef generate
4
+ # To verify freshness: alef verify --exit-code
5
+ # frozen_string_literal: true
6
+
7
+ require "json"
8
+ require "sorbet-runtime"
9
+ require "crawlberg_rb"
10
+ module Crawlberg
11
+ # Authentication configuration.
12
+ module AuthConfig
13
+ extend T::Helpers
14
+ extend T::Sig
15
+
16
+ interface!
17
+
18
+ # Dispatch from a Hash to the appropriate variant constructor.
19
+ # @param hash [Hash] with discriminator field and variant-specific fields
20
+ # @return [variant_class] an instance of the appropriate variant
21
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) }
22
+ def self.from_hash(hash)
23
+ discriminator = hash[:type] || hash["type"]
24
+ case discriminator
25
+ when "basic" then AuthConfigBasic.from_hash(hash)
26
+ when "bearer" then AuthConfigBearer.from_hash(hash)
27
+ when "header" then AuthConfigHeader.from_hash(hash)
28
+ else raise "Unknown discriminator: #{discriminator}"
29
+ end
30
+ end
31
+ end
32
+ ## HTTP Basic authentication.
33
+ AuthConfigBasic = Data.define(:username, :password) do
34
+ include AuthConfig
35
+ extend T::Sig
36
+
37
+ # Username sent in the `Authorization: Basic` header.
38
+ sig { returns(String) }
39
+ def username = super # rubocop:disable Lint/UselessMethodDefinition
40
+ # Password sent in the `Authorization: Basic` header.
41
+ sig { returns(String) }
42
+ def password = super # rubocop:disable Lint/UselessMethodDefinition
43
+ sig { returns(T::Boolean) }
44
+ def basic? = true
45
+ sig { returns(T::Boolean) }
46
+ def bearer? = false
47
+ sig { returns(T::Boolean) }
48
+ def header? = false
49
+ # @param hash [Hash] deserialized from the native extension
50
+ # @return [self]
51
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
52
+ def self.from_hash(hash)
53
+ new(username: hash[:username] || hash["username"], password: hash[:password] || hash["password"])
54
+ end
55
+ end
56
+ ## Bearer token authentication.
57
+ AuthConfigBearer = Data.define(:token) do
58
+ include AuthConfig
59
+ extend T::Sig
60
+
61
+ # Token sent in the `Authorization: Bearer` header.
62
+ sig { returns(String) }
63
+ def token = super # rubocop:disable Lint/UselessMethodDefinition
64
+ sig { returns(T::Boolean) }
65
+ def basic? = false
66
+ sig { returns(T::Boolean) }
67
+ def bearer? = true
68
+ sig { returns(T::Boolean) }
69
+ def header? = false
70
+ # @param hash [Hash] deserialized from the native extension
71
+ # @return [self]
72
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
73
+ def self.from_hash(hash)
74
+ new(token: hash[:token] || hash["token"])
75
+ end
76
+ end
77
+ ## Custom authentication header.
78
+ AuthConfigHeader = Data.define(:name, :value) do
79
+ include AuthConfig
80
+ extend T::Sig
81
+
82
+ # HTTP header name to set on each request.
83
+ sig { returns(String) }
84
+ def name = super # rubocop:disable Lint/UselessMethodDefinition
85
+ # HTTP header value to send.
86
+ sig { returns(String) }
87
+ def value = super # rubocop:disable Lint/UselessMethodDefinition
88
+ sig { returns(T::Boolean) }
89
+ def basic? = false
90
+ sig { returns(T::Boolean) }
91
+ def bearer? = false
92
+ sig { returns(T::Boolean) }
93
+ def header? = true
94
+ # @param hash [Hash] deserialized from the native extension
95
+ # @return [self]
96
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
97
+ def self.from_hash(hash)
98
+ new(name: hash[:name] || hash["name"], value: hash[:value] || hash["value"])
99
+ end
100
+ end
101
+ end
102
+
103
+ module Crawlberg
104
+ # An event emitted during a streaming crawl operation.
105
+ #
106
+ # Not available on `wasm32` targets — streaming requires native concurrency
107
+ # primitives (tokio channels, `JoinSet`) that are not supported on wasm32.
108
+ #
109
+ # Delivered to bindings through each target's native streaming idiom.
110
+ module CrawlEvent
111
+ extend T::Helpers
112
+ extend T::Sig
113
+
114
+ interface!
115
+
116
+ # Dispatch from a Hash to the appropriate variant constructor.
117
+ # @param hash [Hash] with discriminator field and variant-specific fields
118
+ # @return [variant_class] an instance of the appropriate variant
119
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) }
120
+ def self.from_hash(hash)
121
+ discriminator = hash[:type] || hash["type"]
122
+ case discriminator
123
+ when "page" then CrawlEventPage.from_hash(hash)
124
+ when "error" then CrawlEventError.from_hash(hash)
125
+ when "complete" then CrawlEventComplete.from_hash(hash)
126
+ else raise "Unknown discriminator: #{discriminator}"
127
+ end
128
+ end
129
+ end
130
+ ## A single page has been crawled.
131
+ CrawlEventPage = Data.define(:result) do
132
+ include CrawlEvent
133
+ extend T::Sig
134
+
135
+ # The crawled page result.
136
+ sig { returns(CrawlPageResult) }
137
+ def result = super # rubocop:disable Lint/UselessMethodDefinition
138
+ sig { returns(T::Boolean) }
139
+ def page? = true
140
+ sig { returns(T::Boolean) }
141
+ def error? = false
142
+ sig { returns(T::Boolean) }
143
+ def complete? = false
144
+ # @param hash [Hash] deserialized from the native extension
145
+ # @return [self]
146
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
147
+ def self.from_hash(hash)
148
+ new(result: hash[:result] || hash["result"])
149
+ end
150
+ end
151
+ ## An error occurred while crawling a URL.
152
+ CrawlEventError = Data.define(:url, :error) do
153
+ include CrawlEvent
154
+ extend T::Sig
155
+
156
+ # The URL that failed.
157
+ sig { returns(String) }
158
+ def url = super # rubocop:disable Lint/UselessMethodDefinition
159
+ # The error message.
160
+ sig { returns(String) }
161
+ def error = super # rubocop:disable Lint/UselessMethodDefinition
162
+ sig { returns(T::Boolean) }
163
+ def page? = false
164
+ sig { returns(T::Boolean) }
165
+ def error? = true
166
+ sig { returns(T::Boolean) }
167
+ def complete? = false
168
+ # @param hash [Hash] deserialized from the native extension
169
+ # @return [self]
170
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
171
+ def self.from_hash(hash)
172
+ new(url: hash[:url] || hash["url"], error: hash[:error] || hash["error"])
173
+ end
174
+ end
175
+ ## The crawl has completed.
176
+ CrawlEventComplete = Data.define(:pages_crawled) do
177
+ include CrawlEvent
178
+ extend T::Sig
179
+
180
+ # Total number of pages crawled.
181
+ sig { returns(Integer) }
182
+ def pages_crawled = super # rubocop:disable Lint/UselessMethodDefinition
183
+ sig { returns(T::Boolean) }
184
+ def page? = false
185
+ sig { returns(T::Boolean) }
186
+ def error? = false
187
+ sig { returns(T::Boolean) }
188
+ def complete? = true
189
+ # @param hash [Hash] deserialized from the native extension
190
+ # @return [self]
191
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
192
+ def self.from_hash(hash)
193
+ new(pages_crawled: hash[:pages_crawled] || hash["pages_crawled"])
194
+ end
195
+ end
196
+ end
197
+
198
+ module Crawlberg
199
+ # A single page interaction action.
200
+ #
201
+ # Actions are serialized with a `type` tag using camelCase naming,
202
+ # except `ExecuteJs` which is explicitly renamed to `"executeJs"`.
203
+ module PageAction
204
+ extend T::Helpers
205
+ extend T::Sig
206
+
207
+ interface!
208
+
209
+ # Dispatch from a Hash to the appropriate variant constructor.
210
+ # @param hash [Hash] with discriminator field and variant-specific fields
211
+ # @return [variant_class] an instance of the appropriate variant
212
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) }
213
+ def self.from_hash(hash)
214
+ discriminator = hash[:type] || hash["type"]
215
+ case discriminator
216
+ when "click" then PageActionClick.from_hash(hash)
217
+ when "type" then PageActionTypeText.from_hash(hash)
218
+ when "press" then PageActionPress.from_hash(hash)
219
+ when "scroll" then PageActionScroll.from_hash(hash)
220
+ when "wait" then PageActionWait.from_hash(hash)
221
+ when "screenshot" then PageActionScreenshot.from_hash(hash)
222
+ when "executeJs" then PageActionExecuteJs.from_hash(hash)
223
+ when "scrape" then PageActionScrape.from_hash(hash)
224
+ else raise "Unknown discriminator: #{discriminator}"
225
+ end
226
+ end
227
+ end
228
+ ## Click on an element matching the given CSS selector.
229
+ PageActionClick = Data.define(:selector) do
230
+ include PageAction
231
+ extend T::Sig
232
+
233
+ # CSS selector for the element to click.
234
+ sig { returns(String) }
235
+ def selector = super # rubocop:disable Lint/UselessMethodDefinition
236
+ sig { returns(T::Boolean) }
237
+ def click? = true
238
+ sig { returns(T::Boolean) }
239
+ def type_text? = false
240
+ sig { returns(T::Boolean) }
241
+ def press? = false
242
+ sig { returns(T::Boolean) }
243
+ def scroll? = false
244
+ sig { returns(T::Boolean) }
245
+ def wait? = false
246
+ sig { returns(T::Boolean) }
247
+ def screenshot? = false
248
+ sig { returns(T::Boolean) }
249
+ def execute_js? = false
250
+ sig { returns(T::Boolean) }
251
+ def scrape? = false
252
+ # @param hash [Hash] deserialized from the native extension
253
+ # @return [self]
254
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
255
+ def self.from_hash(hash)
256
+ new(selector: hash[:selector] || hash["selector"])
257
+ end
258
+ end
259
+ ## Type text into an element matching the given CSS selector.
260
+ PageActionTypeText = Data.define(:selector, :text) do
261
+ include PageAction
262
+ extend T::Sig
263
+
264
+ # CSS selector for the input element.
265
+ sig { returns(String) }
266
+ def selector = super # rubocop:disable Lint/UselessMethodDefinition
267
+ # Text to type into the element.
268
+ sig { returns(String) }
269
+ def text = super # rubocop:disable Lint/UselessMethodDefinition
270
+ sig { returns(T::Boolean) }
271
+ def click? = false
272
+ sig { returns(T::Boolean) }
273
+ def type_text? = true
274
+ sig { returns(T::Boolean) }
275
+ def press? = false
276
+ sig { returns(T::Boolean) }
277
+ def scroll? = false
278
+ sig { returns(T::Boolean) }
279
+ def wait? = false
280
+ sig { returns(T::Boolean) }
281
+ def screenshot? = false
282
+ sig { returns(T::Boolean) }
283
+ def execute_js? = false
284
+ sig { returns(T::Boolean) }
285
+ def scrape? = false
286
+ # @param hash [Hash] deserialized from the native extension
287
+ # @return [self]
288
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
289
+ def self.from_hash(hash)
290
+ new(selector: hash[:selector] || hash["selector"], text: hash[:text] || hash["text"])
291
+ end
292
+ end
293
+ ## Press a keyboard key (e.g. "Enter", "Tab", "Escape").
294
+ PageActionPress = Data.define(:key) do
295
+ include PageAction
296
+ extend T::Sig
297
+
298
+ # Key name to press.
299
+ sig { returns(String) }
300
+ def key = super # rubocop:disable Lint/UselessMethodDefinition
301
+ sig { returns(T::Boolean) }
302
+ def click? = false
303
+ sig { returns(T::Boolean) }
304
+ def type_text? = false
305
+ sig { returns(T::Boolean) }
306
+ def press? = true
307
+ sig { returns(T::Boolean) }
308
+ def scroll? = false
309
+ sig { returns(T::Boolean) }
310
+ def wait? = false
311
+ sig { returns(T::Boolean) }
312
+ def screenshot? = false
313
+ sig { returns(T::Boolean) }
314
+ def execute_js? = false
315
+ sig { returns(T::Boolean) }
316
+ def scrape? = false
317
+ # @param hash [Hash] deserialized from the native extension
318
+ # @return [self]
319
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
320
+ def self.from_hash(hash)
321
+ new(key: hash[:key] || hash["key"])
322
+ end
323
+ end
324
+ ## Scroll the page or a specific element.
325
+ PageActionScroll = Data.define(:direction, :selector, :amount) do
326
+ include PageAction
327
+ extend T::Sig
328
+
329
+ # Direction to scroll.
330
+ sig { returns(ScrollDirection) }
331
+ def direction = super # rubocop:disable Lint/UselessMethodDefinition
332
+ # Optional CSS selector for a scrollable element. Scrolls the page if absent.
333
+ sig { returns(T.nilable(String)) }
334
+ def selector = super # rubocop:disable Lint/UselessMethodDefinition
335
+ # Optional pixel amount to scroll. Uses a default if absent.
336
+ sig { returns(T.nilable(Integer)) }
337
+ def amount = super # rubocop:disable Lint/UselessMethodDefinition
338
+ sig { returns(T::Boolean) }
339
+ def click? = false
340
+ sig { returns(T::Boolean) }
341
+ def type_text? = false
342
+ sig { returns(T::Boolean) }
343
+ def press? = false
344
+ sig { returns(T::Boolean) }
345
+ def scroll? = true
346
+ sig { returns(T::Boolean) }
347
+ def wait? = false
348
+ sig { returns(T::Boolean) }
349
+ def screenshot? = false
350
+ sig { returns(T::Boolean) }
351
+ def execute_js? = false
352
+ sig { returns(T::Boolean) }
353
+ def scrape? = false
354
+ # @param hash [Hash] deserialized from the native extension
355
+ # @return [self]
356
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
357
+ def self.from_hash(hash)
358
+ new(direction: hash[:direction] || hash["direction"], selector: hash[:selector] || hash["selector"], amount: hash[:amount] || hash["amount"])
359
+ end
360
+ end
361
+ ## Wait for a duration or for an element to appear.
362
+ PageActionWait = Data.define(:milliseconds, :selector) do
363
+ include PageAction
364
+ extend T::Sig
365
+
366
+ # Milliseconds to wait. Ignored if `selector` is provided.
367
+ sig { returns(T.nilable(Integer)) }
368
+ def milliseconds = super # rubocop:disable Lint/UselessMethodDefinition
369
+ # CSS selector to wait for.
370
+ sig { returns(T.nilable(String)) }
371
+ def selector = super # rubocop:disable Lint/UselessMethodDefinition
372
+ sig { returns(T::Boolean) }
373
+ def click? = false
374
+ sig { returns(T::Boolean) }
375
+ def type_text? = false
376
+ sig { returns(T::Boolean) }
377
+ def press? = false
378
+ sig { returns(T::Boolean) }
379
+ def scroll? = false
380
+ sig { returns(T::Boolean) }
381
+ def wait? = true
382
+ sig { returns(T::Boolean) }
383
+ def screenshot? = false
384
+ sig { returns(T::Boolean) }
385
+ def execute_js? = false
386
+ sig { returns(T::Boolean) }
387
+ def scrape? = false
388
+ # @param hash [Hash] deserialized from the native extension
389
+ # @return [self]
390
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
391
+ def self.from_hash(hash)
392
+ new(milliseconds: hash[:milliseconds] || hash["milliseconds"], selector: hash[:selector] || hash["selector"])
393
+ end
394
+ end
395
+ ## Take a screenshot of the current page.
396
+ PageActionScreenshot = Data.define(:full_page) do
397
+ include PageAction
398
+ extend T::Sig
399
+
400
+ # Whether to capture the full scrollable page. Defaults to viewport only.
401
+ #
402
+ # Accepts both the canonical `fullPage` (camelCase) form and the
403
+ # `full_page` (snake_case) alias so language bindings and fixtures can
404
+ # use either convention without error.
405
+ sig { returns(T.nilable(T::Boolean)) }
406
+ def full_page = super # rubocop:disable Lint/UselessMethodDefinition
407
+ sig { returns(T::Boolean) }
408
+ def click? = false
409
+ sig { returns(T::Boolean) }
410
+ def type_text? = false
411
+ sig { returns(T::Boolean) }
412
+ def press? = false
413
+ sig { returns(T::Boolean) }
414
+ def scroll? = false
415
+ sig { returns(T::Boolean) }
416
+ def wait? = false
417
+ sig { returns(T::Boolean) }
418
+ def screenshot? = true
419
+ sig { returns(T::Boolean) }
420
+ def execute_js? = false
421
+ sig { returns(T::Boolean) }
422
+ def scrape? = false
423
+ # @param hash [Hash] deserialized from the native extension
424
+ # @return [self]
425
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
426
+ def self.from_hash(hash)
427
+ new(full_page: hash[:full_page] || hash["full_page"])
428
+ end
429
+ end
430
+ ## Execute arbitrary JavaScript in the page context.
431
+ ##
432
+ ## # Safety
433
+ ##
434
+ ## The script runs with full page privileges in the browser context.
435
+ ## Only execute scripts from trusted sources.
436
+ PageActionExecuteJs = Data.define(:script) do
437
+ include PageAction
438
+ extend T::Sig
439
+
440
+ # JavaScript source code to execute. Max 1 MB.
441
+ sig { returns(String) }
442
+ def script = super # rubocop:disable Lint/UselessMethodDefinition
443
+ sig { returns(T::Boolean) }
444
+ def click? = false
445
+ sig { returns(T::Boolean) }
446
+ def type_text? = false
447
+ sig { returns(T::Boolean) }
448
+ def press? = false
449
+ sig { returns(T::Boolean) }
450
+ def scroll? = false
451
+ sig { returns(T::Boolean) }
452
+ def wait? = false
453
+ sig { returns(T::Boolean) }
454
+ def screenshot? = false
455
+ sig { returns(T::Boolean) }
456
+ def execute_js? = true
457
+ sig { returns(T::Boolean) }
458
+ def scrape? = false
459
+ # @param hash [Hash] deserialized from the native extension
460
+ # @return [self]
461
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
462
+ def self.from_hash(hash)
463
+ new(script: hash[:script] || hash["script"])
464
+ end
465
+ end
466
+ ## Scrape the current page HTML.
467
+ PageActionScrape = Data.define do
468
+ include PageAction
469
+ extend T::Sig
470
+
471
+ sig { returns(T::Boolean) }
472
+ def click? = false
473
+ sig { returns(T::Boolean) }
474
+ def type_text? = false
475
+ sig { returns(T::Boolean) }
476
+ def press? = false
477
+ sig { returns(T::Boolean) }
478
+ def scroll? = false
479
+ sig { returns(T::Boolean) }
480
+ def wait? = false
481
+ sig { returns(T::Boolean) }
482
+ def screenshot? = false
483
+ sig { returns(T::Boolean) }
484
+ def execute_js? = false
485
+ sig { returns(T::Boolean) }
486
+ def scrape? = true
487
+ # @param hash [Hash] deserialized from the native extension
488
+ # @return [self]
489
+ sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
490
+ def self.from_hash(hash)
491
+ new
492
+ end
493
+ end
494
+ end
@@ -0,0 +1,10 @@
1
+ # This file is auto-generated by alef — DO NOT EDIT.
2
+ # alef:hash:cc01a1c094d3c52a5b52b4a912620f9a716410d5ef08dea3520ea2a80cd328ad
3
+ # To regenerate: alef generate
4
+ # To verify freshness: alef verify --exit-code
5
+ # frozen_string_literal: true
6
+
7
+ module Crawlberg
8
+ ## The version string for this package.
9
+ VERSION = "1.0.0.pre.rc.2"
10
+ end
data/lib/crawlberg.rb CHANGED
@@ -1,6 +1,18 @@
1
+ # This file is auto-generated by alef — DO NOT EDIT.
2
+ # alef:hash:cc01a1c094d3c52a5b52b4a912620f9a716410d5ef08dea3520ea2a80cd328ad
3
+ # To regenerate: alef generate
4
+ # To verify freshness: alef verify --exit-code
1
5
  # frozen_string_literal: true
2
6
 
3
- # Reserved name for crawlberg. See https://github.com/xberg-io/crawlberg
7
+ require_relative "crawlberg/version"
8
+ require_relative "crawlberg/native"
9
+
10
+ # Top-level namespace for the Crawlberg Ruby binding.
11
+ #
12
+ # All type and function symbols are re-exported from the native extension
13
+ # loaded via `require_relative "crawlberg/native"`. See
14
+ # {file:README.md} for usage examples and the upstream documentation
15
+ # for the full API reference.
4
16
  module Crawlberg
5
- VERSION = "0.0.1"
17
+ # Re-export all types and functions from native extension
6
18
  end
Binary file