openai-scraper 1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/openai-scraper.rb +285 -0
- metadata +164 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3640e18019f73e33ea331aac0b8834e0de4cc49311b43b0d578b5d1428d55ce9
|
4
|
+
data.tar.gz: 8a6f66459cd9597285c638ecc11751e4bd20722f28dddd1da864e65e81e9b954
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: dc8c94459de87a753402bfd8ce76877116d1565b328519c5a8a64701bb0b89bf6f5b6fa90b33f9aae79e4ef165b85451b0c12d9b1b8550f875078e729d7f735e
|
7
|
+
data.tar.gz: 4a20f94caff31aae40299caaa4e06859467719823ea233a8dda77d8226e64526cd25b9a513886519d98c13fcbc5cf4eae1dc6b6d878ec87e3bc71c7bf138fe96
|
@@ -0,0 +1,285 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'mechanize'
|
3
|
+
require 'simple_cloud_logging'
|
4
|
+
require "openai"
|
5
|
+
require 'colorize'
|
6
|
+
require "io/console"
|
7
|
+
|
8
|
+
# require selenium
|
9
|
+
require 'selenium-webdriver'
|
10
|
+
|
11
|
+
=begin
|
12
|
+
def get_current_weather(location:, unit: "fahrenheit")
|
13
|
+
# use a weather api to fetch weather
|
14
|
+
{ "temperature": 22, "unit": "celsius", "description": "Sunny" }
|
15
|
+
end
|
16
|
+
|
17
|
+
def wl(url)
|
18
|
+
BlackStack::OpenAIScraper.wl(url)
|
19
|
+
end
|
20
|
+
=end
|
21
|
+
|
22
|
+
module BlackStack
|
23
|
+
module OpenAIScraper
|
24
|
+
@@openai_apikey = nil
|
25
|
+
@@client = nil
|
26
|
+
@@browser = nil
|
27
|
+
@@history = []
|
28
|
+
|
29
|
+
# hints to show in the terminal
|
30
|
+
HINT1 = "HINT: The text below is a macr-generated prompt.".yellow
|
31
|
+
|
32
|
+
# name of the module
|
33
|
+
NAME = 'OpenAI Scraper'
|
34
|
+
|
35
|
+
# pronto shown in the console
|
36
|
+
PROMPT = 'openai-scraper'
|
37
|
+
|
38
|
+
def self.set(h)
|
39
|
+
@@openai_apikey = h[:openai_apikey] if h[:openai_apikey]
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.init
|
43
|
+
@@client = OpenAI::Client.new(access_token: @@openai_apikey)
|
44
|
+
@@browser = Selenium::WebDriver.for :chrome
|
45
|
+
# load history array from the file ./history.json, only if the file exists
|
46
|
+
@@history = JSON.parse(File.read('./history.json')) if File.exist?('./history.json')
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.finalize
|
50
|
+
@@browser.quit
|
51
|
+
# overrite the file ./history.json with the current history array
|
52
|
+
File.write('./history.json', @@history.to_json)
|
53
|
+
end
|
54
|
+
|
55
|
+
# help shown in the console
|
56
|
+
def self.help
|
57
|
+
"OpenAI Chatbot, With the Ability to Take Information from the Web.\n
|
58
|
+
List of Commands:\n
|
59
|
+
- \\q: quit\n
|
60
|
+
- \\wt <url>: download the web-page and pass the text content to the model for further reference.\n
|
61
|
+
- \\wl <url>: download the web-page and pass the list of links to the model for further reference.\n
|
62
|
+
"
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.response(s)
|
66
|
+
prompt = s
|
67
|
+
# \wl <url>: download the web-page and pass the list of links to the model for further reference.\n
|
68
|
+
# find the url after the `\wl`, when \wl may be at any position into the string
|
69
|
+
i = 0
|
70
|
+
s.split(' ').each { |x|
|
71
|
+
if x == '\wl'
|
72
|
+
url = s.split(' ')[i+1]
|
73
|
+
prompt.gsub!(/\\wl #{url}/, BlackStack::OpenAIScraper.wl(url).to_s)
|
74
|
+
end
|
75
|
+
i += 1
|
76
|
+
}
|
77
|
+
#puts BlackStack::OpenAIScraper::HINT1
|
78
|
+
#puts prompt.blue
|
79
|
+
|
80
|
+
# \wt <url>: download the web-page and pass the text content to the model for further reference.\n
|
81
|
+
# find the url after the `\wl`, when \wl may be at any position into the string
|
82
|
+
i = 0
|
83
|
+
s.split(' ').each { |x|
|
84
|
+
if x == '\wt'
|
85
|
+
url = s.split(' ')[i+1]
|
86
|
+
prompt.gsub!(/\\wt #{url}/, BlackStack::OpenAIScraper.wt(url).to_s)
|
87
|
+
end
|
88
|
+
i += 1
|
89
|
+
}
|
90
|
+
#puts BlackStack::OpenAIScraper::HINT1
|
91
|
+
#puts prompt.blue
|
92
|
+
|
93
|
+
response = @@client.chat(
|
94
|
+
parameters: {
|
95
|
+
model: "gpt-3.5-turbo", # Required.
|
96
|
+
#max_tokens: 6000,
|
97
|
+
temperature: 0.5,
|
98
|
+
messages: [
|
99
|
+
{ role: "user", content: prompt},
|
100
|
+
#{ role: "assistant", content: nil, function_call: {name: "get_current_weather", arguments: { location: "Boston, MA"}}},
|
101
|
+
#{ role: "function", name: "get_current_weather", content: { temperature: "22", unit: "celsius", description: "Sunny"}},
|
102
|
+
|
103
|
+
], # Required.
|
104
|
+
=begin
|
105
|
+
functions: [
|
106
|
+
{
|
107
|
+
name: "wl",
|
108
|
+
description: "Extract the links from a web page",
|
109
|
+
parameters: {
|
110
|
+
type: :object,
|
111
|
+
properties: {
|
112
|
+
url: {
|
113
|
+
type: "string",
|
114
|
+
description: "The url of the web page"
|
115
|
+
},
|
116
|
+
},
|
117
|
+
required: ['url'],
|
118
|
+
},
|
119
|
+
},
|
120
|
+
{
|
121
|
+
name: "get_current_weather",
|
122
|
+
description: "Get the current weather in a given location",
|
123
|
+
parameters: {
|
124
|
+
type: :object,
|
125
|
+
properties: {
|
126
|
+
location: {
|
127
|
+
type: :string,
|
128
|
+
description: "The city and state, e.g. San Francisco, CA",
|
129
|
+
},
|
130
|
+
unit: {
|
131
|
+
type: "string",
|
132
|
+
enum: %w[celsius fahrenheit],
|
133
|
+
},
|
134
|
+
},
|
135
|
+
required: ["location"],
|
136
|
+
},
|
137
|
+
},
|
138
|
+
],
|
139
|
+
=end
|
140
|
+
})
|
141
|
+
raise response.dig("error", "message") if response.dig("error", "message")
|
142
|
+
return response.dig("choices", 0, "message", "content")
|
143
|
+
end
|
144
|
+
|
145
|
+
# download the web page, and extract all links.
|
146
|
+
#
|
147
|
+
def self.wl(url)
|
148
|
+
# visit the url
|
149
|
+
@@browser.navigate.to url
|
150
|
+
|
151
|
+
# wait up to 30 seconds for the page to load
|
152
|
+
#wait = Selenium::WebDriver::Wait.new(:timeout => 30)
|
153
|
+
#wait.until { @@browser.execute_script("return document.readyState") == "complete" }
|
154
|
+
|
155
|
+
# wait up to 30 seconds for all ajax calls have been executed
|
156
|
+
#wait = Selenium::WebDriver::Wait.new(:timeout => 30)
|
157
|
+
#wait.until { @@browser.execute_script("return jQuery.active") == 0 }
|
158
|
+
|
159
|
+
# get all the links
|
160
|
+
links = @@browser.find_elements(:tag_name, 'a')
|
161
|
+
|
162
|
+
# add the links to a json structure
|
163
|
+
h = []
|
164
|
+
links.each do |link|
|
165
|
+
txt = link.text.to_s.strip
|
166
|
+
h << { 'href' => link['href'], 'text' => txt }
|
167
|
+
end
|
168
|
+
|
169
|
+
# return the prompt
|
170
|
+
#"I will share a json structure with with links. Please remember them for further reference:\n#{h.join("\n").to_json}"
|
171
|
+
#"I have the links in a webpage. Which one of these links is the link to the \"contact us\" page of the company? \n #{h.join("\n")}"
|
172
|
+
h
|
173
|
+
end # def wl
|
174
|
+
|
175
|
+
# download the web page, and extract the text.
|
176
|
+
#
|
177
|
+
def self.wt(url)
|
178
|
+
# visit the url
|
179
|
+
@@browser.navigate.to url
|
180
|
+
|
181
|
+
# wait up to 30 seconds for the page to load
|
182
|
+
#wait = Selenium::WebDriver::Wait.new(:timeout => 30)
|
183
|
+
#wait.until { @@browser.execute_script("return document.readyState") == "complete" }
|
184
|
+
|
185
|
+
# wait up to 30 seconds for all ajax calls have been executed
|
186
|
+
#wait = Selenium::WebDriver::Wait.new(:timeout => 30)
|
187
|
+
#wait.until { @@browser.execute_script("return jQuery.active") == 0 }
|
188
|
+
|
189
|
+
# return the text of the webpage
|
190
|
+
@@browser.find_element(:tag_name, 'body').text
|
191
|
+
end # def wt
|
192
|
+
|
193
|
+
# show the promt and wait for the user input
|
194
|
+
def self.console(l)
|
195
|
+
l = BlackStack::DummyLogger.new(nil) if l.nil?
|
196
|
+
begin
|
197
|
+
while true
|
198
|
+
prompt = nil
|
199
|
+
print "#{BlackStack::OpenAIScraper::PROMPT}> ".blue
|
200
|
+
|
201
|
+
# get the user input, char by char
|
202
|
+
s = ''
|
203
|
+
i = 0
|
204
|
+
while true
|
205
|
+
c = $stdin.getch
|
206
|
+
# if the user press enter, then break the loop
|
207
|
+
if c == "\n" || c == "\r"
|
208
|
+
puts
|
209
|
+
@@history << s
|
210
|
+
i = 0
|
211
|
+
break
|
212
|
+
# if the user press backspace, then remove the last char from the string
|
213
|
+
elsif c == "\u007F"
|
214
|
+
if i >= 1
|
215
|
+
i -= 1
|
216
|
+
s = s[0..-2]
|
217
|
+
print "\b \b"
|
218
|
+
end
|
219
|
+
# if the user press ctrl+c, then reset the prompt
|
220
|
+
elsif c == "\u0003"
|
221
|
+
puts
|
222
|
+
break
|
223
|
+
# if the user press arrow-up
|
224
|
+
elsif c == "\e"
|
225
|
+
d = [$stdin.getch, $stdin.getch].join
|
226
|
+
if d == "[A" && @@history.size > 0
|
227
|
+
# remove the current prompt
|
228
|
+
print "\b \b" * s.size
|
229
|
+
# take the last prompt from the history
|
230
|
+
s = @@history[-1]
|
231
|
+
# remove the last promt from the history
|
232
|
+
@@history = @@history[0..-2]
|
233
|
+
# insert the prompt as the first in the history
|
234
|
+
@@history.insert(0, s)
|
235
|
+
# print the prompt
|
236
|
+
i = s.size
|
237
|
+
print s.strip
|
238
|
+
elsif d == "[B" && @@history.size > 0
|
239
|
+
# remove the current prompt
|
240
|
+
print "\b \b" * s.size
|
241
|
+
# take the first prompt from the history
|
242
|
+
s = @@history[0]
|
243
|
+
# remove the first promt from the history
|
244
|
+
@@history = @@history[1..-1]
|
245
|
+
# insert the prompt as the last in the history
|
246
|
+
@@history.insert(-1, s)
|
247
|
+
# print the prompt
|
248
|
+
i = s.size
|
249
|
+
print s.strip
|
250
|
+
end
|
251
|
+
else
|
252
|
+
s += c
|
253
|
+
i += 1
|
254
|
+
print c
|
255
|
+
end
|
256
|
+
end # while true
|
257
|
+
|
258
|
+
# `\q` to quit
|
259
|
+
if s == '\q'
|
260
|
+
exit(0)
|
261
|
+
# `\h` for help
|
262
|
+
elsif s == '\h'
|
263
|
+
puts BlackStack::OpenAIScraper.help
|
264
|
+
next
|
265
|
+
else
|
266
|
+
prompt = s
|
267
|
+
end
|
268
|
+
# standard openai prompt
|
269
|
+
puts BlackStack::OpenAIScraper.response(prompt).to_s.green
|
270
|
+
end # while true
|
271
|
+
|
272
|
+
rescue SignalException, SystemExit, Interrupt => e
|
273
|
+
l.logs "Finalizing... "
|
274
|
+
BlackStack::OpenAIScraper.finalize
|
275
|
+
l.logf "done".green
|
276
|
+
|
277
|
+
l.log 'Bye!'
|
278
|
+
exit(0)
|
279
|
+
rescue => e
|
280
|
+
puts "Error: #{e.to_console.red}".red
|
281
|
+
end # begin
|
282
|
+
end # def console
|
283
|
+
|
284
|
+
end # module OpenAIScraper
|
285
|
+
end # module BlackStack
|
metadata
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: openai-scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '1.2'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Leandro Daniel Sardi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-07-16 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.13.10
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.13.10
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.13.10
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.13.10
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: simple_cloud_logging
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 1.2.2
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.2.2
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.2.2
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.2.2
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: colorize
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 0.8.1
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 0.8.1
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.8.1
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 0.8.1
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: selenium-webdriver
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 4.10.0
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 4.10.0
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 4.10.0
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 4.10.0
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
name: ruby-openai
|
95
|
+
requirement: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 4.2.0
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 4.2.0
|
103
|
+
type: :runtime
|
104
|
+
prerelease: false
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 4.2.0
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 4.2.0
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: io-console
|
115
|
+
requirement: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - "~>"
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: 0.5.11
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: 0.5.11
|
123
|
+
type: :runtime
|
124
|
+
prerelease: false
|
125
|
+
version_requirements: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - "~>"
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: 0.5.11
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: 0.5.11
|
133
|
+
description: 'Find documentation here: https://github.com/leandrosardi/openai-scraper'
|
134
|
+
email: leandro@connectionsphere.com
|
135
|
+
executables: []
|
136
|
+
extensions: []
|
137
|
+
extra_rdoc_files: []
|
138
|
+
files:
|
139
|
+
- lib/openai-scraper.rb
|
140
|
+
homepage: https://rubygems.org/gems/openai-scraper
|
141
|
+
licenses:
|
142
|
+
- MIT
|
143
|
+
metadata: {}
|
144
|
+
post_install_message:
|
145
|
+
rdoc_options: []
|
146
|
+
require_paths:
|
147
|
+
- lib
|
148
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
|
+
requirements:
|
155
|
+
- - ">="
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: '0'
|
158
|
+
requirements: []
|
159
|
+
rubygems_version: 3.3.7
|
160
|
+
signing_key:
|
161
|
+
specification_version: 4
|
162
|
+
summary: Ruby library for automation operation on the GMass Deliverability Test and
|
163
|
+
Spam Checker.
|
164
|
+
test_files: []
|