openai-scraper 1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/openai-scraper.rb +285 -0
- metadata +164 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3640e18019f73e33ea331aac0b8834e0de4cc49311b43b0d578b5d1428d55ce9
|
4
|
+
data.tar.gz: 8a6f66459cd9597285c638ecc11751e4bd20722f28dddd1da864e65e81e9b954
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: dc8c94459de87a753402bfd8ce76877116d1565b328519c5a8a64701bb0b89bf6f5b6fa90b33f9aae79e4ef165b85451b0c12d9b1b8550f875078e729d7f735e
|
7
|
+
data.tar.gz: 4a20f94caff31aae40299caaa4e06859467719823ea233a8dda77d8226e64526cd25b9a513886519d98c13fcbc5cf4eae1dc6b6d878ec87e3bc71c7bf138fe96
|
@@ -0,0 +1,285 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'mechanize'
|
3
|
+
require 'simple_cloud_logging'
|
4
|
+
require "openai"
|
5
|
+
require 'colorize'
|
6
|
+
require "io/console"
|
7
|
+
|
8
|
+
# require selenium
|
9
|
+
require 'selenium-webdriver'
|
10
|
+
|
11
|
+
=begin
|
12
|
+
def get_current_weather(location:, unit: "fahrenheit")
|
13
|
+
# use a weather api to fetch weather
|
14
|
+
{ "temperature": 22, "unit": "celsius", "description": "Sunny" }
|
15
|
+
end
|
16
|
+
|
17
|
+
def wl(url)
|
18
|
+
BlackStack::OpenAIScraper.wl(url)
|
19
|
+
end
|
20
|
+
=end
|
21
|
+
|
22
|
+
module BlackStack
|
23
|
+
module OpenAIScraper
|
24
|
+
@@openai_apikey = nil
|
25
|
+
@@client = nil
|
26
|
+
@@browser = nil
|
27
|
+
@@history = []
|
28
|
+
|
29
|
+
# hints to show in the terminal
|
30
|
+
HINT1 = "HINT: The text below is a macr-generated prompt.".yellow
|
31
|
+
|
32
|
+
# name of the module
|
33
|
+
NAME = 'OpenAI Scraper'
|
34
|
+
|
35
|
+
# pronto shown in the console
|
36
|
+
PROMPT = 'openai-scraper'
|
37
|
+
|
38
|
+
def self.set(h)
|
39
|
+
@@openai_apikey = h[:openai_apikey] if h[:openai_apikey]
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.init
|
43
|
+
@@client = OpenAI::Client.new(access_token: @@openai_apikey)
|
44
|
+
@@browser = Selenium::WebDriver.for :chrome
|
45
|
+
# load history array from the file ./history.json, only if the file exists
|
46
|
+
@@history = JSON.parse(File.read('./history.json')) if File.exist?('./history.json')
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.finalize
|
50
|
+
@@browser.quit
|
51
|
+
# overrite the file ./history.json with the current history array
|
52
|
+
File.write('./history.json', @@history.to_json)
|
53
|
+
end
|
54
|
+
|
55
|
+
# help shown in the console
|
56
|
+
def self.help
|
57
|
+
"OpenAI Chatbot, With the Ability to Take Information from the Web.\n
|
58
|
+
List of Commands:\n
|
59
|
+
- \\q: quit\n
|
60
|
+
- \\wt <url>: download the web-page and pass the text content to the model for further reference.\n
|
61
|
+
- \\wl <url>: download the web-page and pass the list of links to the model for further reference.\n
|
62
|
+
"
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.response(s)
|
66
|
+
prompt = s
|
67
|
+
# \wl <url>: download the web-page and pass the list of links to the model for further reference.\n
|
68
|
+
# find the url after the `\wl`, when \wl may be at any position into the string
|
69
|
+
i = 0
|
70
|
+
s.split(' ').each { |x|
|
71
|
+
if x == '\wl'
|
72
|
+
url = s.split(' ')[i+1]
|
73
|
+
prompt.gsub!(/\\wl #{url}/, BlackStack::OpenAIScraper.wl(url).to_s)
|
74
|
+
end
|
75
|
+
i += 1
|
76
|
+
}
|
77
|
+
#puts BlackStack::OpenAIScraper::HINT1
|
78
|
+
#puts prompt.blue
|
79
|
+
|
80
|
+
# \wt <url>: download the web-page and pass the text content to the model for further reference.\n
|
81
|
+
# find the url after the `\wl`, when \wl may be at any position into the string
|
82
|
+
i = 0
|
83
|
+
s.split(' ').each { |x|
|
84
|
+
if x == '\wt'
|
85
|
+
url = s.split(' ')[i+1]
|
86
|
+
prompt.gsub!(/\\wt #{url}/, BlackStack::OpenAIScraper.wt(url).to_s)
|
87
|
+
end
|
88
|
+
i += 1
|
89
|
+
}
|
90
|
+
#puts BlackStack::OpenAIScraper::HINT1
|
91
|
+
#puts prompt.blue
|
92
|
+
|
93
|
+
response = @@client.chat(
|
94
|
+
parameters: {
|
95
|
+
model: "gpt-3.5-turbo", # Required.
|
96
|
+
#max_tokens: 6000,
|
97
|
+
temperature: 0.5,
|
98
|
+
messages: [
|
99
|
+
{ role: "user", content: prompt},
|
100
|
+
#{ role: "assistant", content: nil, function_call: {name: "get_current_weather", arguments: { location: "Boston, MA"}}},
|
101
|
+
#{ role: "function", name: "get_current_weather", content: { temperature: "22", unit: "celsius", description: "Sunny"}},
|
102
|
+
|
103
|
+
], # Required.
|
104
|
+
=begin
|
105
|
+
functions: [
|
106
|
+
{
|
107
|
+
name: "wl",
|
108
|
+
description: "Extract the links from a web page",
|
109
|
+
parameters: {
|
110
|
+
type: :object,
|
111
|
+
properties: {
|
112
|
+
url: {
|
113
|
+
type: "string",
|
114
|
+
description: "The url of the web page"
|
115
|
+
},
|
116
|
+
},
|
117
|
+
required: ['url'],
|
118
|
+
},
|
119
|
+
},
|
120
|
+
{
|
121
|
+
name: "get_current_weather",
|
122
|
+
description: "Get the current weather in a given location",
|
123
|
+
parameters: {
|
124
|
+
type: :object,
|
125
|
+
properties: {
|
126
|
+
location: {
|
127
|
+
type: :string,
|
128
|
+
description: "The city and state, e.g. San Francisco, CA",
|
129
|
+
},
|
130
|
+
unit: {
|
131
|
+
type: "string",
|
132
|
+
enum: %w[celsius fahrenheit],
|
133
|
+
},
|
134
|
+
},
|
135
|
+
required: ["location"],
|
136
|
+
},
|
137
|
+
},
|
138
|
+
],
|
139
|
+
=end
|
140
|
+
})
|
141
|
+
raise response.dig("error", "message") if response.dig("error", "message")
|
142
|
+
return response.dig("choices", 0, "message", "content")
|
143
|
+
end
|
144
|
+
|
145
|
+
# download the web page, and extract all links.
|
146
|
+
#
|
147
|
+
def self.wl(url)
|
148
|
+
# visit the url
|
149
|
+
@@browser.navigate.to url
|
150
|
+
|
151
|
+
# wait up to 30 seconds for the page to load
|
152
|
+
#wait = Selenium::WebDriver::Wait.new(:timeout => 30)
|
153
|
+
#wait.until { @@browser.execute_script("return document.readyState") == "complete" }
|
154
|
+
|
155
|
+
# wait up to 30 seconds for all ajax calls have been executed
|
156
|
+
#wait = Selenium::WebDriver::Wait.new(:timeout => 30)
|
157
|
+
#wait.until { @@browser.execute_script("return jQuery.active") == 0 }
|
158
|
+
|
159
|
+
# get all the links
|
160
|
+
links = @@browser.find_elements(:tag_name, 'a')
|
161
|
+
|
162
|
+
# add the links to a json structure
|
163
|
+
h = []
|
164
|
+
links.each do |link|
|
165
|
+
txt = link.text.to_s.strip
|
166
|
+
h << { 'href' => link['href'], 'text' => txt }
|
167
|
+
end
|
168
|
+
|
169
|
+
# return the prompt
|
170
|
+
#"I will share a json structure with with links. Please remember them for further reference:\n#{h.join("\n").to_json}"
|
171
|
+
#"I have the links in a webpage. Which one of these links is the link to the \"contact us\" page of the company? \n #{h.join("\n")}"
|
172
|
+
h
|
173
|
+
end # def wl
|
174
|
+
|
175
|
+
# download the web page, and extract the text.
|
176
|
+
#
|
177
|
+
def self.wt(url)
|
178
|
+
# visit the url
|
179
|
+
@@browser.navigate.to url
|
180
|
+
|
181
|
+
# wait up to 30 seconds for the page to load
|
182
|
+
#wait = Selenium::WebDriver::Wait.new(:timeout => 30)
|
183
|
+
#wait.until { @@browser.execute_script("return document.readyState") == "complete" }
|
184
|
+
|
185
|
+
# wait up to 30 seconds for all ajax calls have been executed
|
186
|
+
#wait = Selenium::WebDriver::Wait.new(:timeout => 30)
|
187
|
+
#wait.until { @@browser.execute_script("return jQuery.active") == 0 }
|
188
|
+
|
189
|
+
# return the text of the webpage
|
190
|
+
@@browser.find_element(:tag_name, 'body').text
|
191
|
+
end # def wt
|
192
|
+
|
193
|
+
# show the promt and wait for the user input
|
194
|
+
def self.console(l)
|
195
|
+
l = BlackStack::DummyLogger.new(nil) if l.nil?
|
196
|
+
begin
|
197
|
+
while true
|
198
|
+
prompt = nil
|
199
|
+
print "#{BlackStack::OpenAIScraper::PROMPT}> ".blue
|
200
|
+
|
201
|
+
# get the user input, char by char
|
202
|
+
s = ''
|
203
|
+
i = 0
|
204
|
+
while true
|
205
|
+
c = $stdin.getch
|
206
|
+
# if the user press enter, then break the loop
|
207
|
+
if c == "\n" || c == "\r"
|
208
|
+
puts
|
209
|
+
@@history << s
|
210
|
+
i = 0
|
211
|
+
break
|
212
|
+
# if the user press backspace, then remove the last char from the string
|
213
|
+
elsif c == "\u007F"
|
214
|
+
if i >= 1
|
215
|
+
i -= 1
|
216
|
+
s = s[0..-2]
|
217
|
+
print "\b \b"
|
218
|
+
end
|
219
|
+
# if the user press ctrl+c, then reset the prompt
|
220
|
+
elsif c == "\u0003"
|
221
|
+
puts
|
222
|
+
break
|
223
|
+
# if the user press arrow-up
|
224
|
+
elsif c == "\e"
|
225
|
+
d = [$stdin.getch, $stdin.getch].join
|
226
|
+
if d == "[A" && @@history.size > 0
|
227
|
+
# remove the current prompt
|
228
|
+
print "\b \b" * s.size
|
229
|
+
# take the last prompt from the history
|
230
|
+
s = @@history[-1]
|
231
|
+
# remove the last promt from the history
|
232
|
+
@@history = @@history[0..-2]
|
233
|
+
# insert the prompt as the first in the history
|
234
|
+
@@history.insert(0, s)
|
235
|
+
# print the prompt
|
236
|
+
i = s.size
|
237
|
+
print s.strip
|
238
|
+
elsif d == "[B" && @@history.size > 0
|
239
|
+
# remove the current prompt
|
240
|
+
print "\b \b" * s.size
|
241
|
+
# take the first prompt from the history
|
242
|
+
s = @@history[0]
|
243
|
+
# remove the first promt from the history
|
244
|
+
@@history = @@history[1..-1]
|
245
|
+
# insert the prompt as the last in the history
|
246
|
+
@@history.insert(-1, s)
|
247
|
+
# print the prompt
|
248
|
+
i = s.size
|
249
|
+
print s.strip
|
250
|
+
end
|
251
|
+
else
|
252
|
+
s += c
|
253
|
+
i += 1
|
254
|
+
print c
|
255
|
+
end
|
256
|
+
end # while true
|
257
|
+
|
258
|
+
# `\q` to quit
|
259
|
+
if s == '\q'
|
260
|
+
exit(0)
|
261
|
+
# `\h` for help
|
262
|
+
elsif s == '\h'
|
263
|
+
puts BlackStack::OpenAIScraper.help
|
264
|
+
next
|
265
|
+
else
|
266
|
+
prompt = s
|
267
|
+
end
|
268
|
+
# standard openai prompt
|
269
|
+
puts BlackStack::OpenAIScraper.response(prompt).to_s.green
|
270
|
+
end # while true
|
271
|
+
|
272
|
+
rescue SignalException, SystemExit, Interrupt => e
|
273
|
+
l.logs "Finalizing... "
|
274
|
+
BlackStack::OpenAIScraper.finalize
|
275
|
+
l.logf "done".green
|
276
|
+
|
277
|
+
l.log 'Bye!'
|
278
|
+
exit(0)
|
279
|
+
rescue => e
|
280
|
+
puts "Error: #{e.to_console.red}".red
|
281
|
+
end # begin
|
282
|
+
end # def console
|
283
|
+
|
284
|
+
end # module OpenAIScraper
|
285
|
+
end # module BlackStack
|
metadata
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: openai-scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '1.2'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Leandro Daniel Sardi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-07-16 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.13.10
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.13.10
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.13.10
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.13.10
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: simple_cloud_logging
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 1.2.2
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.2.2
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.2.2
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.2.2
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: colorize
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 0.8.1
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 0.8.1
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.8.1
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 0.8.1
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: selenium-webdriver
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 4.10.0
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 4.10.0
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 4.10.0
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 4.10.0
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
name: ruby-openai
|
95
|
+
requirement: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 4.2.0
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 4.2.0
|
103
|
+
type: :runtime
|
104
|
+
prerelease: false
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 4.2.0
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 4.2.0
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: io-console
|
115
|
+
requirement: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - "~>"
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: 0.5.11
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: 0.5.11
|
123
|
+
type: :runtime
|
124
|
+
prerelease: false
|
125
|
+
version_requirements: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - "~>"
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: 0.5.11
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: 0.5.11
|
133
|
+
description: 'Find documentation here: https://github.com/leandrosardi/openai-scraper'
|
134
|
+
email: leandro@connectionsphere.com
|
135
|
+
executables: []
|
136
|
+
extensions: []
|
137
|
+
extra_rdoc_files: []
|
138
|
+
files:
|
139
|
+
- lib/openai-scraper.rb
|
140
|
+
homepage: https://rubygems.org/gems/openai-scraper
|
141
|
+
licenses:
|
142
|
+
- MIT
|
143
|
+
metadata: {}
|
144
|
+
post_install_message:
|
145
|
+
rdoc_options: []
|
146
|
+
require_paths:
|
147
|
+
- lib
|
148
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
|
+
requirements:
|
155
|
+
- - ">="
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: '0'
|
158
|
+
requirements: []
|
159
|
+
rubygems_version: 3.3.7
|
160
|
+
signing_key:
|
161
|
+
specification_version: 4
|
162
|
+
summary: Ruby library for automation operation on the GMass Deliverability Test and
|
163
|
+
Spam Checker.
|
164
|
+
test_files: []
|