scrapedo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +13 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/LICENSE.txt +21 -0
- data/README.md +38 -0
- data/Rakefile +16 -0
- data/lib/scrapedo/config/google_builder/country_restrict.yml +384 -0
- data/lib/scrapedo/config/google_builder/domain.yml +86 -0
- data/lib/scrapedo/config/google_builder/geo_location.yml +720 -0
- data/lib/scrapedo/config/google_builder/host_language.yml +469 -0
- data/lib/scrapedo/config/google_builder/language_restrict.yml +107 -0
- data/lib/scrapedo/config/google_builder/time_period.yml +7 -0
- data/lib/scrapedo/google_builder.rb +187 -0
- data/lib/scrapedo/version.rb +5 -0
- data/lib/scrapedo.rb +12 -0
- data/scrapedo.gemspec +44 -0
- data/sig/scrapedo.rbs +66 -0
- metadata +71 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "json"
|
|
5
|
+
require "yaml"
|
|
6
|
+
|
|
7
|
+
class GoogleBuilder
|
|
8
|
+
CONFIG_PATH = Pathname.new(__dir__ || ".").join("config/google_builder").expand_path
|
|
9
|
+
FILE_MAP = {
|
|
10
|
+
time_period: "time_period.yml",
|
|
11
|
+
hl: "host_language.yml",
|
|
12
|
+
lr: "language_restrict.yml",
|
|
13
|
+
gl: "geo_location.yml",
|
|
14
|
+
cr: "country_restrict.yml",
|
|
15
|
+
google_domain: "domain.yml"
|
|
16
|
+
}.freeze
|
|
17
|
+
|
|
18
|
+
def initialize(token, all_params: false)
|
|
19
|
+
@has_next = false
|
|
20
|
+
@data = { token: token }
|
|
21
|
+
@params_cache = {}
|
|
22
|
+
@all_params = all_params
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def next?
|
|
26
|
+
@has_next
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def search(query)
|
|
30
|
+
raise ArgumentError, "Query is required" if blank? query
|
|
31
|
+
|
|
32
|
+
@data.select! { |key| key == :token }
|
|
33
|
+
@has_next = false
|
|
34
|
+
@data[:q] = query
|
|
35
|
+
self
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def device(device)
|
|
39
|
+
@data[:device] = device if %w[desktop mobile].include?(device)
|
|
40
|
+
self
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def include_html
|
|
44
|
+
@data[:include_html] = true
|
|
45
|
+
self
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def hl(language)
|
|
49
|
+
@data[:hl] = language
|
|
50
|
+
self
|
|
51
|
+
end
|
|
52
|
+
alias host_language hl
|
|
53
|
+
|
|
54
|
+
def gl(location)
|
|
55
|
+
@data[:gl] = location
|
|
56
|
+
self
|
|
57
|
+
end
|
|
58
|
+
alias geo_location gl
|
|
59
|
+
|
|
60
|
+
def google_domain(domain)
|
|
61
|
+
@data[:google_domain] = domain
|
|
62
|
+
self
|
|
63
|
+
end
|
|
64
|
+
alias domain google_domain
|
|
65
|
+
|
|
66
|
+
def location(location)
|
|
67
|
+
@data[:location] = location
|
|
68
|
+
self
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def uule(uule)
|
|
72
|
+
@data[:uule] = uule
|
|
73
|
+
self
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def lr(language)
|
|
77
|
+
@data[:lr] = language
|
|
78
|
+
self
|
|
79
|
+
end
|
|
80
|
+
alias language_restrict lr
|
|
81
|
+
|
|
82
|
+
def cr(country)
|
|
83
|
+
@data[:cr] = country
|
|
84
|
+
self
|
|
85
|
+
end
|
|
86
|
+
alias country_restrict cr
|
|
87
|
+
|
|
88
|
+
def safe_search
|
|
89
|
+
@data[:safe] = "active"
|
|
90
|
+
self
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def enable_nfpr
|
|
94
|
+
@data[:nfpr] = true
|
|
95
|
+
self
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def disable_filter
|
|
99
|
+
@data[:filter] = false
|
|
100
|
+
self
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def start(start = 0)
|
|
104
|
+
raise ArgumentError, "Query is required" if blank? @data[:q]
|
|
105
|
+
|
|
106
|
+
@data[:start] = start.positive? ? start / 10 * 10 : 0
|
|
107
|
+
response = Net::HTTP.get(scrapedo_url)
|
|
108
|
+
result = JSON.parse(response)
|
|
109
|
+
@has_next = !result["pagination"]["next"].nil?
|
|
110
|
+
result
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def next
|
|
114
|
+
start(@data[:start] + 10) if next?
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
%i[desktop mobile].each do |name|
|
|
118
|
+
define_method(name) do
|
|
119
|
+
@data[:device] = name
|
|
120
|
+
self
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def params(*params)
|
|
125
|
+
params.first.each do |key, value|
|
|
126
|
+
key_sym = key.to_sym
|
|
127
|
+
@data[key_sym] = value if key_sym != :token && key_sym != :start && !value.nil?
|
|
128
|
+
end
|
|
129
|
+
self
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def scrapedo_url
|
|
133
|
+
url = URI("https://api.scrape.do/plugin/google/search")
|
|
134
|
+
url.query = URI.encode_www_form(@data)
|
|
135
|
+
url
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def method_missing(name, *args, &block)
|
|
139
|
+
source_type, config_data = find_config_source(name)
|
|
140
|
+
if source_type
|
|
141
|
+
define_singleton_method(name) do
|
|
142
|
+
@data[source_type] = config_data[name]
|
|
143
|
+
self
|
|
144
|
+
end
|
|
145
|
+
return send(name)
|
|
146
|
+
end
|
|
147
|
+
super
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def respond_to_missing?(name, include_private = false)
|
|
151
|
+
find_config_source(name)&.present? || super
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def inspect
|
|
155
|
+
vars = instance_variables.map do |var|
|
|
156
|
+
value = instance_variable_get(var)
|
|
157
|
+
"#{var}=#{value.inspect}" unless var == :@params_cache
|
|
158
|
+
end.join(", ")
|
|
159
|
+
"#<#{self.class}:0x#{object_id.to_s(16)} #{vars}>"
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
private
|
|
163
|
+
|
|
164
|
+
def load_yaml(filename)
|
|
165
|
+
if @params_cache[filename].nil?
|
|
166
|
+
path = CONFIG_PATH.join(filename)
|
|
167
|
+
@params_cache[filename] = if @all_params
|
|
168
|
+
YAML.safe_load_file(path, symbolize_names: true, aliases: true)[:all]
|
|
169
|
+
else
|
|
170
|
+
YAML.safe_load_file(path, symbolize_names: true, aliases: true)[:common]
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
@params_cache[filename]
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def find_config_source(key)
|
|
177
|
+
FILE_MAP.each do |type, filename|
|
|
178
|
+
data = load_yaml(filename)
|
|
179
|
+
return [type, data] if data.key?(key)
|
|
180
|
+
end
|
|
181
|
+
nil
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def blank?(value)
|
|
185
|
+
value.nil? || (value.respond_to?(:strip) && value.strip.empty?)
|
|
186
|
+
end
|
|
187
|
+
end
|
data/lib/scrapedo.rb
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "scrapedo/version"
|
|
4
|
+
require_relative "scrapedo/google_builder"
|
|
5
|
+
|
|
6
|
+
module Scrapedo
|
|
7
|
+
def self.google(token)
|
|
8
|
+
raise ArgumentError, "Token is required" if token.nil? || token.empty?
|
|
9
|
+
|
|
10
|
+
GoogleBuilder.new(token)
|
|
11
|
+
end
|
|
12
|
+
end
|
data/scrapedo.gemspec
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/scrapedo/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "scrapedo"
|
|
7
|
+
spec.version = Scrapedo::VERSION
|
|
8
|
+
spec.authors = ["hypersport"]
|
|
9
|
+
spec.email = ["boss.yuan@outlook.com"]
|
|
10
|
+
|
|
11
|
+
spec.summary = "Ruby Gem For Scrape.do"
|
|
12
|
+
spec.description = "An unofficial Ruby gem designed to provide a convenient interface for Ruby developers. It eliminates the need to handle complex HTTP requests and parsing logic, allowing you to easily access data from the Scrapedo API (currently supports only Google Search API) within your applications."
|
|
13
|
+
spec.homepage = "https://github.com/hypersport/scrapedo"
|
|
14
|
+
spec.license = "MIT"
|
|
15
|
+
spec.required_ruby_version = ">= 3.0.0"
|
|
16
|
+
|
|
17
|
+
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
|
18
|
+
|
|
19
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
20
|
+
spec.metadata["source_code_uri"] = "https://github.com/hypersport/scrapedo"
|
|
21
|
+
spec.metadata["changelog_uri"] = "https://github.com/hypersport/scrapedo/blob/main/CHANGELOG.md"
|
|
22
|
+
spec.metadata["bug_tracker_uri"] = "https://github.com/hypersport/scrapedo/issues"
|
|
23
|
+
spec.metadata["documentation_uri"] = "https://rubydoc.info/github/hypersport/scrapedo"
|
|
24
|
+
spec.metadata["yard.run"] = "yri"
|
|
25
|
+
spec.metadata["rubygems_mfa_required"] = "true"
|
|
26
|
+
|
|
27
|
+
# Specify which files should be added to the gem when it is released.
|
|
28
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
29
|
+
spec.files = Dir.chdir(__dir__) do
|
|
30
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
|
31
|
+
(File.expand_path(f) == __FILE__) ||
|
|
32
|
+
f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor Gemfile])
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
spec.bindir = "exe"
|
|
36
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
37
|
+
spec.require_paths = ["lib"]
|
|
38
|
+
|
|
39
|
+
# Uncomment to register a new dependency of the gem
|
|
40
|
+
# spec.add_dependency "example-gem", "~> 1.0"
|
|
41
|
+
|
|
42
|
+
# For more information and examples about making a new gem, check out our
|
|
43
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
|
44
|
+
end
|
data/sig/scrapedo.rbs
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
module Scrapedo
|
|
2
|
+
VERSION: String
|
|
3
|
+
def self.google: -> GoogleBuilder
|
|
4
|
+
end
|
|
5
|
+
|
|
6
|
+
class GoogleBuilder
|
|
7
|
+
CONFIG_PATH: Pathname
|
|
8
|
+
FILE_MAP: Hash[Symbol, String]
|
|
9
|
+
|
|
10
|
+
@has_next: bool
|
|
11
|
+
@data: Hash[Object, Object]
|
|
12
|
+
@params_cache: Hash[Object, Object]
|
|
13
|
+
@all_params: bool
|
|
14
|
+
|
|
15
|
+
def initialize: (String) -> void
|
|
16
|
+
|
|
17
|
+
def next?: () -> bool
|
|
18
|
+
|
|
19
|
+
def search: (String) -> GoogleBuilder
|
|
20
|
+
|
|
21
|
+
def device: -> GoogleBuilder
|
|
22
|
+
|
|
23
|
+
def include_html: -> GoogleBuilder
|
|
24
|
+
|
|
25
|
+
def hl: -> GoogleBuilder
|
|
26
|
+
alias host_language hl
|
|
27
|
+
|
|
28
|
+
def gl: -> GoogleBuilder
|
|
29
|
+
alias geo_location gl
|
|
30
|
+
|
|
31
|
+
def google_domain: -> GoogleBuilder
|
|
32
|
+
alias domain google_domain
|
|
33
|
+
|
|
34
|
+
def location: -> GoogleBuilder
|
|
35
|
+
|
|
36
|
+
def uule: -> GoogleBuilder
|
|
37
|
+
|
|
38
|
+
def lr: -> GoogleBuilder
|
|
39
|
+
alias language_restrict lr
|
|
40
|
+
|
|
41
|
+
def cr: -> GoogleBuilder
|
|
42
|
+
alias country_restrict cr
|
|
43
|
+
|
|
44
|
+
def safe_search: -> GoogleBuilder
|
|
45
|
+
|
|
46
|
+
def enable_nfpr: -> GoogleBuilder
|
|
47
|
+
|
|
48
|
+
def disable_filter: -> GoogleBuilder
|
|
49
|
+
|
|
50
|
+
def start: (Integer) -> Hash[String, String]
|
|
51
|
+
|
|
52
|
+
def next: -> Hash[String, String]
|
|
53
|
+
|
|
54
|
+
def params: -> GoogleBuilder
|
|
55
|
+
|
|
56
|
+
def scrapedo_url: -> URI::Generic
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def load_yaml: (String) -> Hash[Object, String]
|
|
61
|
+
|
|
62
|
+
def find_config_source: (Symbol | String) -> [Symbol, Hash[Symbol, Object]]?
|
|
63
|
+
|
|
64
|
+
def blank?: (Object) -> bool
|
|
65
|
+
|
|
66
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: scrapedo
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- hypersport
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-03-29 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: An unofficial Ruby gem designed to provide a convenient interface for
|
|
14
|
+
Ruby developers. It eliminates the need to handle complex HTTP requests and parsing
|
|
15
|
+
logic, allowing you to easily access data from the Scrapedo API (currently supports
|
|
16
|
+
only Google Search API) within your applications.
|
|
17
|
+
email:
|
|
18
|
+
- boss.yuan@outlook.com
|
|
19
|
+
executables: []
|
|
20
|
+
extensions: []
|
|
21
|
+
extra_rdoc_files: []
|
|
22
|
+
files:
|
|
23
|
+
- ".rubocop.yml"
|
|
24
|
+
- CHANGELOG.md
|
|
25
|
+
- CODE_OF_CONDUCT.md
|
|
26
|
+
- LICENSE.txt
|
|
27
|
+
- README.md
|
|
28
|
+
- Rakefile
|
|
29
|
+
- lib/scrapedo.rb
|
|
30
|
+
- lib/scrapedo/config/google_builder/country_restrict.yml
|
|
31
|
+
- lib/scrapedo/config/google_builder/domain.yml
|
|
32
|
+
- lib/scrapedo/config/google_builder/geo_location.yml
|
|
33
|
+
- lib/scrapedo/config/google_builder/host_language.yml
|
|
34
|
+
- lib/scrapedo/config/google_builder/language_restrict.yml
|
|
35
|
+
- lib/scrapedo/config/google_builder/time_period.yml
|
|
36
|
+
- lib/scrapedo/google_builder.rb
|
|
37
|
+
- lib/scrapedo/version.rb
|
|
38
|
+
- scrapedo.gemspec
|
|
39
|
+
- sig/scrapedo.rbs
|
|
40
|
+
homepage: https://github.com/hypersport/scrapedo
|
|
41
|
+
licenses:
|
|
42
|
+
- MIT
|
|
43
|
+
metadata:
|
|
44
|
+
allowed_push_host: https://rubygems.org
|
|
45
|
+
homepage_uri: https://github.com/hypersport/scrapedo
|
|
46
|
+
source_code_uri: https://github.com/hypersport/scrapedo
|
|
47
|
+
changelog_uri: https://github.com/hypersport/scrapedo/blob/main/CHANGELOG.md
|
|
48
|
+
bug_tracker_uri: https://github.com/hypersport/scrapedo/issues
|
|
49
|
+
documentation_uri: https://rubydoc.info/github/hypersport/scrapedo
|
|
50
|
+
yard.run: yri
|
|
51
|
+
rubygems_mfa_required: 'true'
|
|
52
|
+
post_install_message:
|
|
53
|
+
rdoc_options: []
|
|
54
|
+
require_paths:
|
|
55
|
+
- lib
|
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: 3.0.0
|
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
62
|
+
requirements:
|
|
63
|
+
- - ">="
|
|
64
|
+
- !ruby/object:Gem::Version
|
|
65
|
+
version: '0'
|
|
66
|
+
requirements: []
|
|
67
|
+
rubygems_version: 3.4.19
|
|
68
|
+
signing_key:
|
|
69
|
+
specification_version: 4
|
|
70
|
+
summary: Ruby Gem For Scrape.do
|
|
71
|
+
test_files: []
|