scrapedo 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +16 -0
- data/README.md +2 -0
- data/lib/scrapedo/google_builder.rb +108 -2
- data/lib/scrapedo/version.rb +1 -1
- data/lib/scrapedo.rb +9 -2
- data/scrapedo.gemspec +1 -1
- metadata +3 -3
- data/sig/scrapedo.rbs +0 -66
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 668a016977a4780253fc501ca7a95e080fbb4d8d5e19d6ba57feb8b427dbec18
|
|
4
|
+
data.tar.gz: 215c25a17b97fdc39bb2532cde698d885619d9b63b83adb0de124d9b9600dcd3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4ae653db01a38e250df1d5b4a5fd54aec1e14e8e0f57dd274cd632b4d71b9ee747598f2054e3431ba28a40b931d271b4ec7250ab5f62826d876960ca4ce80eaa
|
|
7
|
+
data.tar.gz: a326ae65643a30751d3423069a1f1ad5fa62cc8191717fe6da7e4d59b47270cf66b1f5287c58678d1a005459a022509f1c2076e3171efc1d82d6bfb6b2d98163
|
data/.rubocop.yml
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
AllCops:
|
|
2
2
|
TargetRubyVersion: 3.0
|
|
3
|
+
SuggestExtensions: false
|
|
3
4
|
|
|
4
5
|
Style/StringLiterals:
|
|
5
6
|
Enabled: true
|
|
@@ -11,3 +12,18 @@ Style/StringLiteralsInInterpolation:
|
|
|
11
12
|
|
|
12
13
|
Layout/LineLength:
|
|
13
14
|
Max: 120
|
|
15
|
+
AllowedPatterns:
|
|
16
|
+
- !ruby/regexp /\A\s*#/
|
|
17
|
+
Exclude:
|
|
18
|
+
- '*.gemspec'
|
|
19
|
+
|
|
20
|
+
Lint/AmbiguousBlockAssociation:
|
|
21
|
+
Description: This cop checks for ambiguous block association with method when param passed without parentheses.
|
|
22
|
+
Enabled: false
|
|
23
|
+
|
|
24
|
+
Style/Documentation:
|
|
25
|
+
Description: This cop checks for missing top-level documentation of classes and modules.
|
|
26
|
+
Enabled: false
|
|
27
|
+
|
|
28
|
+
Metrics/ClassLength:
|
|
29
|
+
Max: 150
|
data/README.md
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
|
|
6
6
|
An unofficial Ruby gem designed to provide a convenient interface for Ruby developers. It eliminates the need to handle complex HTTP requests and parsing logic, allowing you to easily access data from the Scrapedo API (currently supports only Google Search API) within your applications.
|
|
7
7
|
|
|
8
|
+
It supports **ALL** parameters of the [Google Search API](https://scrape.do/documentation/google-search-api/search/).
|
|
9
|
+
|
|
8
10
|
## Installation
|
|
9
11
|
|
|
10
12
|
Install the gem and add to the application's Gemfile by executing:
|
|
@@ -5,6 +5,12 @@ require "json"
|
|
|
5
5
|
require "yaml"
|
|
6
6
|
|
|
7
7
|
class GoogleBuilder
|
|
8
|
+
# @!method desktop
|
|
9
|
+
# Desktop SERP layout. See {General Parameters}[https://scrape.do/documentation/google-search-api/search/#general].
|
|
10
|
+
#
|
|
11
|
+
# @!method mobile
|
|
12
|
+
# Mobile SERP layout. See {General Parameters}[https://scrape.do/documentation/google-search-api/search/#general].
|
|
13
|
+
|
|
8
14
|
CONFIG_PATH = Pathname.new(__dir__ || ".").join("config/google_builder").expand_path
|
|
9
15
|
FILE_MAP = {
|
|
10
16
|
time_period: "time_period.yml",
|
|
@@ -15,6 +21,10 @@ class GoogleBuilder
|
|
|
15
21
|
google_domain: "domain.yml"
|
|
16
22
|
}.freeze
|
|
17
23
|
|
|
24
|
+
# Initializes a new instance of GoogleBuilder
|
|
25
|
+
#
|
|
26
|
+
# @param token [String]
|
|
27
|
+
# @param all_params [Boolean] If true, all available parameters will be included, otherwise only common parameters will be included.
|
|
18
28
|
def initialize(token, all_params: false)
|
|
19
29
|
@has_next = false
|
|
20
30
|
@data = { token: token }
|
|
@@ -22,10 +32,16 @@ class GoogleBuilder
|
|
|
22
32
|
@all_params = all_params
|
|
23
33
|
end
|
|
24
34
|
|
|
35
|
+
# Returns +true+ if result have more pages; otherwise +false+.
|
|
25
36
|
def next?
|
|
26
37
|
@has_next
|
|
27
38
|
end
|
|
28
39
|
|
|
40
|
+
# Search query. See {Required Parameters}[https://scrape.do/documentation/google-search-api/search/#required].
|
|
41
|
+
#
|
|
42
|
+
# @note This is required!
|
|
43
|
+
# @param query [String]
|
|
44
|
+
# @raise ArgumentError if query is nil or empty.
|
|
29
45
|
def search(query)
|
|
30
46
|
raise ArgumentError, "Query is required" if blank? query
|
|
31
47
|
|
|
@@ -35,71 +51,154 @@ class GoogleBuilder
|
|
|
35
51
|
self
|
|
36
52
|
end
|
|
37
53
|
|
|
54
|
+
# Device type for SERP layout. See {General Parameters}[https://scrape.do/documentation/google-search-api/search/#general].
|
|
55
|
+
# Accepted values: desktop, mobile, default is desktop.
|
|
56
|
+
#
|
|
57
|
+
# This can also use GoogleBuilder#{device}. For example:
|
|
58
|
+
# - GoogleBuilder#device("desktop") is same as GoogleBuilder#desktop.
|
|
59
|
+
# - GoogleBuilder#device("mobile") is same as GoogleBuilder#mobile.
|
|
60
|
+
#
|
|
61
|
+
# @param device [String]
|
|
38
62
|
def device(device)
|
|
39
63
|
@data[:device] = device if %w[desktop mobile].include?(device)
|
|
40
64
|
self
|
|
41
65
|
end
|
|
42
66
|
|
|
67
|
+
# Result includes the raw Google HTML. Useful for debugging and custom parsing.
|
|
68
|
+
# See {General Parameters}[https://scrape.do/documentation/google-search-api/search/#general].
|
|
43
69
|
def include_html
|
|
44
70
|
@data[:include_html] = true
|
|
45
71
|
self
|
|
46
72
|
end
|
|
47
73
|
|
|
74
|
+
# Host language of the Google UI. Default is en.
|
|
75
|
+
# Supports 150+ languages. ISO 639-1 codes. See {Language Parameter}[https://scrape.do/documentation/google-search-api/localization/#language-parameter-hl].
|
|
76
|
+
#
|
|
77
|
+
# This can also use GoogleBuilder#hl_{language}. For example:
|
|
78
|
+
# - GoogleBuilder#hl("en") is same as GoogleBuilder#hl_en and GoogleBuilder#hl_english.
|
|
79
|
+
# - GoogleBuilder#hl("ach") is same as GoogleBuilder#hl_ach and GoogleBuilder#hl_luo. (For this one, all_params must be true)
|
|
80
|
+
#
|
|
81
|
+
# GoogleBuilder#host_language is an alias for GoogleBuilder#hl.
|
|
82
|
+
#
|
|
83
|
+
# @param language [String]
|
|
48
84
|
def hl(language)
|
|
49
85
|
@data[:hl] = language
|
|
50
86
|
self
|
|
51
87
|
end
|
|
52
88
|
alias host_language hl
|
|
53
89
|
|
|
90
|
+
# Geo Location (datacenter country). Determines from which country's perspective results are ranked and returned. Default is us.
|
|
91
|
+
# Supports 240+ countries. ISO 3166-1 alpha-2 codes. See {Country Parameter}[https://scrape.do/documentation/google-search-api/localization/#country-parameter-gl].
|
|
92
|
+
#
|
|
93
|
+
# This can also use GoogleBuilder#gl_{location}. For example:
|
|
94
|
+
# - GoogleBuilder#gl("us") is same as GoogleBuilder#gl_us and GoogleBuilder#gl_united_states.
|
|
95
|
+
# - GoogleBuilder#gl("cl") is same as GoogleBuilder#gl_cl and GoogleBuilder#gl_chile. (For this one, all_params must be true)
|
|
96
|
+
#
|
|
97
|
+
# GoogleBuilder#geo_location is an alias for GoogleBuilder#gl.
|
|
98
|
+
#
|
|
99
|
+
# @param location [String]
|
|
54
100
|
def gl(location)
|
|
55
101
|
@data[:gl] = location
|
|
56
102
|
self
|
|
57
103
|
end
|
|
58
104
|
alias geo_location gl
|
|
59
105
|
|
|
60
|
-
|
|
106
|
+
# Google domain to query. Prefixes [https://], [http://], and [www.] are automatically stripped. Default is google.com.
|
|
107
|
+
# Supports 84 regional domains. See {Supported Google Domains}[https://scrape.do/documentation/google-search-api/localization/#supported-google-domains].
|
|
108
|
+
#
|
|
109
|
+
# This can also use GoogleBuilder#domain_{location}. For example:
|
|
110
|
+
# - GoogleBuilder#domain("google.com") is same as GoogleBuilder#domain_united_states.
|
|
111
|
+
#
|
|
112
|
+
# GoogleBuilder#google_domain is an alias for GoogleBuilder#domain.
|
|
113
|
+
#
|
|
114
|
+
# @param domain [String]
|
|
115
|
+
def domain(domain)
|
|
61
116
|
@data[:google_domain] = domain
|
|
62
117
|
self
|
|
63
118
|
end
|
|
64
|
-
alias domain
|
|
119
|
+
alias google_domain domain
|
|
65
120
|
|
|
121
|
+
# Location name in Google's canonical format. Automatically encoded to UULE internally.
|
|
122
|
+
# See {Location}[https://scrape.do/documentation/google-search-api/localization/#location].
|
|
123
|
+
#
|
|
124
|
+
# Format: City,State/Region,Country
|
|
125
|
+
#
|
|
126
|
+
# Examples:
|
|
127
|
+
# [Istanbul,Istanbul,Turkey]
|
|
128
|
+
# [New York,New York,United States]
|
|
129
|
+
#
|
|
130
|
+
# @param location [String]
|
|
66
131
|
def location(location)
|
|
67
132
|
@data[:location] = location
|
|
68
133
|
self
|
|
69
134
|
end
|
|
70
135
|
|
|
136
|
+
# Google UULE-encoded location string.
|
|
137
|
+
# Auto-generated from location when not provided.
|
|
138
|
+
# If both location and uule are sent, uule takes priority. location is sufficient for most use cases.
|
|
139
|
+
# See {uule}[https://scrape.do/documentation/google-search-api/localization/#uule].
|
|
140
|
+
#
|
|
141
|
+
# @param uule [String]
|
|
71
142
|
def uule(uule)
|
|
72
143
|
@data[:uule] = uule
|
|
73
144
|
self
|
|
74
145
|
end
|
|
75
146
|
|
|
147
|
+
# Language Restrict applies strict language filtering. Only results written in the specified language are returned.
|
|
148
|
+
# Supports 35 languages. See {Language Restrict Parameter}[https://scrape.do/documentation/google-search-api/localization/#language-restrict-parameter-lr].
|
|
149
|
+
#
|
|
150
|
+
# This can also use GoogleBuilder#lr_{language}. For example:
|
|
151
|
+
# - GoogleBuilder#lr("en") is same as GoogleBuilder#lr_en and GoogleBuilder#lr_english.
|
|
152
|
+
#
|
|
153
|
+
# GoogleBuilder#language_restrict is an alias for GoogleBuilder#lr.
|
|
154
|
+
#
|
|
155
|
+
# @param language [String]
|
|
76
156
|
def lr(language)
|
|
77
157
|
@data[:lr] = language
|
|
78
158
|
self
|
|
79
159
|
end
|
|
80
160
|
alias language_restrict lr
|
|
81
161
|
|
|
162
|
+
# Country Restrict applies strict country filtering. Only results originating from the specified country are returned.
|
|
163
|
+
# Supports 240+ countries. See {Country Restrict Parameter}[https://scrape.do/documentation/google-search-api/localization/#country-restrict-parameter-cr].
|
|
164
|
+
#
|
|
165
|
+
# This can also use GoogleBuilder#cr_{country}. For example:
|
|
166
|
+
# - GoogleBuilder#cr("countryUS") is same as GoogleBuilder#cr_us and GoogleBuilder#cr_united_states.
|
|
167
|
+
# - GoogleBuilder#cr("countryAL") is same as GoogleBuilder#cr_al and GoogleBuilder#cr_albania. (For this one, all_params must be true)
|
|
168
|
+
#
|
|
169
|
+
# GoogleBuilder#country_restrict is an alias for GoogleBuilder#cr.
|
|
170
|
+
#
|
|
171
|
+
# @param country [String]
|
|
82
172
|
def cr(country)
|
|
83
173
|
@data[:cr] = country
|
|
84
174
|
self
|
|
85
175
|
end
|
|
86
176
|
alias country_restrict cr
|
|
87
177
|
|
|
178
|
+
# Send active to filter adult content from results.
|
|
88
179
|
def safe_search
|
|
89
180
|
@data[:safe] = "active"
|
|
90
181
|
self
|
|
91
182
|
end
|
|
92
183
|
|
|
184
|
+
# Enables Google's automatic spelling correction.
|
|
93
185
|
def enable_nfpr
|
|
94
186
|
@data[:nfpr] = true
|
|
95
187
|
self
|
|
96
188
|
end
|
|
97
189
|
|
|
190
|
+
# Disables "Similar Results" and "Omitted Results" filters.
|
|
98
191
|
def disable_filter
|
|
99
192
|
@data[:filter] = false
|
|
100
193
|
self
|
|
101
194
|
end
|
|
102
195
|
|
|
196
|
+
# Start scraping from a specific offset.
|
|
197
|
+
# See {General Parameters}[https://scrape.do/documentation/google-search-api/search/#general].
|
|
198
|
+
#
|
|
199
|
+
# @param start [Integer] The offset to start from. Default is 0.
|
|
200
|
+
# @raise ArgumentError If the search query is not set.
|
|
201
|
+
# @return [JSON] The search results starting from the specified offset.
|
|
103
202
|
def start(start = 0)
|
|
104
203
|
raise ArgumentError, "Query is required" if blank? @data[:q]
|
|
105
204
|
|
|
@@ -110,6 +209,9 @@ class GoogleBuilder
|
|
|
110
209
|
result
|
|
111
210
|
end
|
|
112
211
|
|
|
212
|
+
# Gets next page if result has next page.
|
|
213
|
+
#
|
|
214
|
+
# @return [JSON] The next page of results.
|
|
113
215
|
def next
|
|
114
216
|
start(@data[:start] + 10) if next?
|
|
115
217
|
end
|
|
@@ -121,6 +223,9 @@ class GoogleBuilder
|
|
|
121
223
|
end
|
|
122
224
|
end
|
|
123
225
|
|
|
226
|
+
# Set any parameter from {Google Search API}[https://scrape.do/documentation/google-search-api/search/].
|
|
227
|
+
#
|
|
228
|
+
# @param params [Hash] A hash of parameters to set.
|
|
124
229
|
def params(*params)
|
|
125
230
|
params.first.each do |key, value|
|
|
126
231
|
key_sym = key.to_sym
|
|
@@ -129,6 +234,7 @@ class GoogleBuilder
|
|
|
129
234
|
self
|
|
130
235
|
end
|
|
131
236
|
|
|
237
|
+
# Returns the URL to be used for the request.
|
|
132
238
|
def scrapedo_url
|
|
133
239
|
url = URI("https://api.scrape.do/plugin/google/search")
|
|
134
240
|
url.query = URI.encode_www_form(@data)
|
data/lib/scrapedo/version.rb
CHANGED
data/lib/scrapedo.rb
CHANGED
|
@@ -4,9 +4,16 @@ require_relative "scrapedo/version"
|
|
|
4
4
|
require_relative "scrapedo/google_builder"
|
|
5
5
|
|
|
6
6
|
module Scrapedo
|
|
7
|
-
|
|
7
|
+
# Generates Google Search Endpoint. See {Scrapedo API}[https://scrape.do/documentation/google-search-api/search/]
|
|
8
|
+
# @param token [String]
|
|
9
|
+
# @param all_params [Boolean] If true, all available parameters will be included, otherwise only common parameters will be included.
|
|
10
|
+
# @raise ArgumentError if token is nil or empty.
|
|
11
|
+
# @example
|
|
12
|
+
# Scrapedo.google("your_token")
|
|
13
|
+
# Scrapedo.google("your_token", true)
|
|
14
|
+
def self.google(token, all_params: false)
|
|
8
15
|
raise ArgumentError, "Token is required" if token.nil? || token.empty?
|
|
9
16
|
|
|
10
|
-
GoogleBuilder.new(token)
|
|
17
|
+
GoogleBuilder.new(token, all_params: all_params)
|
|
11
18
|
end
|
|
12
19
|
end
|
data/scrapedo.gemspec
CHANGED
|
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
|
9
9
|
spec.email = ["boss.yuan@outlook.com"]
|
|
10
10
|
|
|
11
11
|
spec.summary = "Ruby Gem For Scrape.do"
|
|
12
|
-
spec.description = "An unofficial Ruby gem designed to provide a convenient interface for Ruby developers. It eliminates the need to handle complex HTTP requests and parsing logic, allowing you to easily access data from the Scrapedo API (currently supports only Google Search API) within your applications."
|
|
12
|
+
spec.description = "An unofficial Ruby gem designed to provide a convenient interface for Ruby developers. It eliminates the need to handle complex HTTP requests and parsing logic, allowing you to easily access data from the Scrapedo API (currently supports only Google Search API) within your applications. It supports all parameters of the [Google Search API](https://scrape.do/documentation/google-search-api/search/)."
|
|
13
13
|
spec.homepage = "https://github.com/hypersport/scrapedo"
|
|
14
14
|
spec.license = "MIT"
|
|
15
15
|
spec.required_ruby_version = ">= 3.0.0"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: scrapedo
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- hypersport
|
|
@@ -13,7 +13,8 @@ dependencies: []
|
|
|
13
13
|
description: An unofficial Ruby gem designed to provide a convenient interface for
|
|
14
14
|
Ruby developers. It eliminates the need to handle complex HTTP requests and parsing
|
|
15
15
|
logic, allowing you to easily access data from the Scrapedo API (currently supports
|
|
16
|
-
only Google Search API) within your applications.
|
|
16
|
+
only Google Search API) within your applications. It supports all parameters of
|
|
17
|
+
the [Google Search API](https://scrape.do/documentation/google-search-api/search/).
|
|
17
18
|
email:
|
|
18
19
|
- boss.yuan@outlook.com
|
|
19
20
|
executables: []
|
|
@@ -36,7 +37,6 @@ files:
|
|
|
36
37
|
- lib/scrapedo/google_builder.rb
|
|
37
38
|
- lib/scrapedo/version.rb
|
|
38
39
|
- scrapedo.gemspec
|
|
39
|
-
- sig/scrapedo.rbs
|
|
40
40
|
homepage: https://github.com/hypersport/scrapedo
|
|
41
41
|
licenses:
|
|
42
42
|
- MIT
|
data/sig/scrapedo.rbs
DELETED
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
module Scrapedo
|
|
2
|
-
VERSION: String
|
|
3
|
-
def self.google: -> GoogleBuilder
|
|
4
|
-
end
|
|
5
|
-
|
|
6
|
-
class GoogleBuilder
|
|
7
|
-
CONFIG_PATH: Pathname
|
|
8
|
-
FILE_MAP: Hash[Symbol, String]
|
|
9
|
-
|
|
10
|
-
@has_next: bool
|
|
11
|
-
@data: Hash[Object, Object]
|
|
12
|
-
@params_cache: Hash[Object, Object]
|
|
13
|
-
@all_params: bool
|
|
14
|
-
|
|
15
|
-
def initialize: (String) -> void
|
|
16
|
-
|
|
17
|
-
def next?: () -> bool
|
|
18
|
-
|
|
19
|
-
def search: (String) -> GoogleBuilder
|
|
20
|
-
|
|
21
|
-
def device: -> GoogleBuilder
|
|
22
|
-
|
|
23
|
-
def include_html: -> GoogleBuilder
|
|
24
|
-
|
|
25
|
-
def hl: -> GoogleBuilder
|
|
26
|
-
alias host_language hl
|
|
27
|
-
|
|
28
|
-
def gl: -> GoogleBuilder
|
|
29
|
-
alias geo_location gl
|
|
30
|
-
|
|
31
|
-
def google_domain: -> GoogleBuilder
|
|
32
|
-
alias domain google_domain
|
|
33
|
-
|
|
34
|
-
def location: -> GoogleBuilder
|
|
35
|
-
|
|
36
|
-
def uule: -> GoogleBuilder
|
|
37
|
-
|
|
38
|
-
def lr: -> GoogleBuilder
|
|
39
|
-
alias language_restrict lr
|
|
40
|
-
|
|
41
|
-
def cr: -> GoogleBuilder
|
|
42
|
-
alias country_restrict cr
|
|
43
|
-
|
|
44
|
-
def safe_search: -> GoogleBuilder
|
|
45
|
-
|
|
46
|
-
def enable_nfpr: -> GoogleBuilder
|
|
47
|
-
|
|
48
|
-
def disable_filter: -> GoogleBuilder
|
|
49
|
-
|
|
50
|
-
def start: (Integer) -> Hash[String, String]
|
|
51
|
-
|
|
52
|
-
def next: -> Hash[String, String]
|
|
53
|
-
|
|
54
|
-
def params: -> GoogleBuilder
|
|
55
|
-
|
|
56
|
-
def scrapedo_url: -> URI::Generic
|
|
57
|
-
|
|
58
|
-
private
|
|
59
|
-
|
|
60
|
-
def load_yaml: (String) -> Hash[Object, String]
|
|
61
|
-
|
|
62
|
-
def find_config_source: (Symbol | String) -> [Symbol, Hash[Symbol, Object]]?
|
|
63
|
-
|
|
64
|
-
def blank?: (Object) -> bool
|
|
65
|
-
|
|
66
|
-
end
|