openai-scraper 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/openai-scraper.rb +285 -0
  3. metadata +164 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3640e18019f73e33ea331aac0b8834e0de4cc49311b43b0d578b5d1428d55ce9
4
+ data.tar.gz: 8a6f66459cd9597285c638ecc11751e4bd20722f28dddd1da864e65e81e9b954
5
+ SHA512:
6
+ metadata.gz: dc8c94459de87a753402bfd8ce76877116d1565b328519c5a8a64701bb0b89bf6f5b6fa90b33f9aae79e4ef165b85451b0c12d9b1b8550f875078e729d7f735e
7
+ data.tar.gz: 4a20f94caff31aae40299caaa4e06859467719823ea233a8dda77d8226e64526cd25b9a513886519d98c13fcbc5cf4eae1dc6b6d878ec87e3bc71c7bf138fe96
@@ -0,0 +1,285 @@
1
+ require 'nokogiri'
2
+ require 'mechanize'
3
+ require 'simple_cloud_logging'
4
+ require "openai"
5
+ require 'colorize'
6
+ require "io/console"
7
+
8
+ # require selenium
9
+ require 'selenium-webdriver'
10
+
11
+ =begin
12
+ def get_current_weather(location:, unit: "fahrenheit")
13
+ # use a weather api to fetch weather
14
+ { "temperature": 22, "unit": "celsius", "description": "Sunny" }
15
+ end
16
+
17
+ def wl(url)
18
+ BlackStack::OpenAIScraper.wl(url)
19
+ end
20
+ =end
21
+
22
+ module BlackStack
23
+ module OpenAIScraper
24
+ @@openai_apikey = nil
25
+ @@client = nil
26
+ @@browser = nil
27
+ @@history = []
28
+
29
+ # hints to show in the terminal
30
+ HINT1 = "HINT: The text below is a macr-generated prompt.".yellow
31
+
32
+ # name of the module
33
+ NAME = 'OpenAI Scraper'
34
+
35
+ # pronto shown in the console
36
+ PROMPT = 'openai-scraper'
37
+
38
+ def self.set(h)
39
+ @@openai_apikey = h[:openai_apikey] if h[:openai_apikey]
40
+ end
41
+
42
+ def self.init
43
+ @@client = OpenAI::Client.new(access_token: @@openai_apikey)
44
+ @@browser = Selenium::WebDriver.for :chrome
45
+ # load history array from the file ./history.json, only if the file exists
46
+ @@history = JSON.parse(File.read('./history.json')) if File.exist?('./history.json')
47
+ end
48
+
49
+ def self.finalize
50
+ @@browser.quit
51
+ # overrite the file ./history.json with the current history array
52
+ File.write('./history.json', @@history.to_json)
53
+ end
54
+
55
+ # help shown in the console
56
+ def self.help
57
+ "OpenAI Chatbot, With the Ability to Take Information from the Web.\n
58
+ List of Commands:\n
59
+ - \\q: quit\n
60
+ - \\wt <url>: download the web-page and pass the text content to the model for further reference.\n
61
+ - \\wl <url>: download the web-page and pass the list of links to the model for further reference.\n
62
+ "
63
+ end
64
+
65
+ def self.response(s)
66
+ prompt = s
67
+ # \wl <url>: download the web-page and pass the list of links to the model for further reference.\n
68
+ # find the url after the `\wl`, when \wl may be at any position into the string
69
+ i = 0
70
+ s.split(' ').each { |x|
71
+ if x == '\wl'
72
+ url = s.split(' ')[i+1]
73
+ prompt.gsub!(/\\wl #{url}/, BlackStack::OpenAIScraper.wl(url).to_s)
74
+ end
75
+ i += 1
76
+ }
77
+ #puts BlackStack::OpenAIScraper::HINT1
78
+ #puts prompt.blue
79
+
80
+ # \wt <url>: download the web-page and pass the text content to the model for further reference.\n
81
+ # find the url after the `\wl`, when \wl may be at any position into the string
82
+ i = 0
83
+ s.split(' ').each { |x|
84
+ if x == '\wt'
85
+ url = s.split(' ')[i+1]
86
+ prompt.gsub!(/\\wt #{url}/, BlackStack::OpenAIScraper.wt(url).to_s)
87
+ end
88
+ i += 1
89
+ }
90
+ #puts BlackStack::OpenAIScraper::HINT1
91
+ #puts prompt.blue
92
+
93
+ response = @@client.chat(
94
+ parameters: {
95
+ model: "gpt-3.5-turbo", # Required.
96
+ #max_tokens: 6000,
97
+ temperature: 0.5,
98
+ messages: [
99
+ { role: "user", content: prompt},
100
+ #{ role: "assistant", content: nil, function_call: {name: "get_current_weather", arguments: { location: "Boston, MA"}}},
101
+ #{ role: "function", name: "get_current_weather", content: { temperature: "22", unit: "celsius", description: "Sunny"}},
102
+
103
+ ], # Required.
104
+ =begin
105
+ functions: [
106
+ {
107
+ name: "wl",
108
+ description: "Extract the links from a web page",
109
+ parameters: {
110
+ type: :object,
111
+ properties: {
112
+ url: {
113
+ type: "string",
114
+ description: "The url of the web page"
115
+ },
116
+ },
117
+ required: ['url'],
118
+ },
119
+ },
120
+ {
121
+ name: "get_current_weather",
122
+ description: "Get the current weather in a given location",
123
+ parameters: {
124
+ type: :object,
125
+ properties: {
126
+ location: {
127
+ type: :string,
128
+ description: "The city and state, e.g. San Francisco, CA",
129
+ },
130
+ unit: {
131
+ type: "string",
132
+ enum: %w[celsius fahrenheit],
133
+ },
134
+ },
135
+ required: ["location"],
136
+ },
137
+ },
138
+ ],
139
+ =end
140
+ })
141
+ raise response.dig("error", "message") if response.dig("error", "message")
142
+ return response.dig("choices", 0, "message", "content")
143
+ end
144
+
145
+ # download the web page, and extract all links.
146
+ #
147
+ def self.wl(url)
148
+ # visit the url
149
+ @@browser.navigate.to url
150
+
151
+ # wait up to 30 seconds for the page to load
152
+ #wait = Selenium::WebDriver::Wait.new(:timeout => 30)
153
+ #wait.until { @@browser.execute_script("return document.readyState") == "complete" }
154
+
155
+ # wait up to 30 seconds for all ajax calls have been executed
156
+ #wait = Selenium::WebDriver::Wait.new(:timeout => 30)
157
+ #wait.until { @@browser.execute_script("return jQuery.active") == 0 }
158
+
159
+ # get all the links
160
+ links = @@browser.find_elements(:tag_name, 'a')
161
+
162
+ # add the links to a json structure
163
+ h = []
164
+ links.each do |link|
165
+ txt = link.text.to_s.strip
166
+ h << { 'href' => link['href'], 'text' => txt }
167
+ end
168
+
169
+ # return the prompt
170
+ #"I will share a json structure with with links. Please remember them for further reference:\n#{h.join("\n").to_json}"
171
+ #"I have the links in a webpage. Which one of these links is the link to the \"contact us\" page of the company? \n #{h.join("\n")}"
172
+ h
173
+ end # def wl
174
+
175
+ # download the web page, and extract the text.
176
+ #
177
+ def self.wt(url)
178
+ # visit the url
179
+ @@browser.navigate.to url
180
+
181
+ # wait up to 30 seconds for the page to load
182
+ #wait = Selenium::WebDriver::Wait.new(:timeout => 30)
183
+ #wait.until { @@browser.execute_script("return document.readyState") == "complete" }
184
+
185
+ # wait up to 30 seconds for all ajax calls have been executed
186
+ #wait = Selenium::WebDriver::Wait.new(:timeout => 30)
187
+ #wait.until { @@browser.execute_script("return jQuery.active") == 0 }
188
+
189
+ # return the text of the webpage
190
+ @@browser.find_element(:tag_name, 'body').text
191
+ end # def wt
192
+
193
+ # show the promt and wait for the user input
194
+ def self.console(l)
195
+ l = BlackStack::DummyLogger.new(nil) if l.nil?
196
+ begin
197
+ while true
198
+ prompt = nil
199
+ print "#{BlackStack::OpenAIScraper::PROMPT}> ".blue
200
+
201
+ # get the user input, char by char
202
+ s = ''
203
+ i = 0
204
+ while true
205
+ c = $stdin.getch
206
+ # if the user press enter, then break the loop
207
+ if c == "\n" || c == "\r"
208
+ puts
209
+ @@history << s
210
+ i = 0
211
+ break
212
+ # if the user press backspace, then remove the last char from the string
213
+ elsif c == "\u007F"
214
+ if i >= 1
215
+ i -= 1
216
+ s = s[0..-2]
217
+ print "\b \b"
218
+ end
219
+ # if the user press ctrl+c, then reset the prompt
220
+ elsif c == "\u0003"
221
+ puts
222
+ break
223
+ # if the user press arrow-up
224
+ elsif c == "\e"
225
+ d = [$stdin.getch, $stdin.getch].join
226
+ if d == "[A" && @@history.size > 0
227
+ # remove the current prompt
228
+ print "\b \b" * s.size
229
+ # take the last prompt from the history
230
+ s = @@history[-1]
231
+ # remove the last promt from the history
232
+ @@history = @@history[0..-2]
233
+ # insert the prompt as the first in the history
234
+ @@history.insert(0, s)
235
+ # print the prompt
236
+ i = s.size
237
+ print s.strip
238
+ elsif d == "[B" && @@history.size > 0
239
+ # remove the current prompt
240
+ print "\b \b" * s.size
241
+ # take the first prompt from the history
242
+ s = @@history[0]
243
+ # remove the first promt from the history
244
+ @@history = @@history[1..-1]
245
+ # insert the prompt as the last in the history
246
+ @@history.insert(-1, s)
247
+ # print the prompt
248
+ i = s.size
249
+ print s.strip
250
+ end
251
+ else
252
+ s += c
253
+ i += 1
254
+ print c
255
+ end
256
+ end # while true
257
+
258
+ # `\q` to quit
259
+ if s == '\q'
260
+ exit(0)
261
+ # `\h` for help
262
+ elsif s == '\h'
263
+ puts BlackStack::OpenAIScraper.help
264
+ next
265
+ else
266
+ prompt = s
267
+ end
268
+ # standard openai prompt
269
+ puts BlackStack::OpenAIScraper.response(prompt).to_s.green
270
+ end # while true
271
+
272
+ rescue SignalException, SystemExit, Interrupt => e
273
+ l.logs "Finalizing... "
274
+ BlackStack::OpenAIScraper.finalize
275
+ l.logf "done".green
276
+
277
+ l.log 'Bye!'
278
+ exit(0)
279
+ rescue => e
280
+ puts "Error: #{e.to_console.red}".red
281
+ end # begin
282
+ end # def console
283
+
284
+ end # module OpenAIScraper
285
+ end # module BlackStack
metadata ADDED
@@ -0,0 +1,164 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: openai-scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: '1.2'
5
+ platform: ruby
6
+ authors:
7
+ - Leandro Daniel Sardi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-07-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.13.10
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.13.10
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: 1.13.10
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.13.10
33
+ - !ruby/object:Gem::Dependency
34
+ name: simple_cloud_logging
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 1.2.2
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.2.2
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: 1.2.2
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.2.2
53
+ - !ruby/object:Gem::Dependency
54
+ name: colorize
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: 0.8.1
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 0.8.1
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: 0.8.1
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 0.8.1
73
+ - !ruby/object:Gem::Dependency
74
+ name: selenium-webdriver
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: 4.10.0
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 4.10.0
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 4.10.0
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 4.10.0
93
+ - !ruby/object:Gem::Dependency
94
+ name: ruby-openai
95
+ requirement: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: 4.2.0
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 4.2.0
103
+ type: :runtime
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: 4.2.0
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: 4.2.0
113
+ - !ruby/object:Gem::Dependency
114
+ name: io-console
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - "~>"
118
+ - !ruby/object:Gem::Version
119
+ version: 0.5.11
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: 0.5.11
123
+ type: :runtime
124
+ prerelease: false
125
+ version_requirements: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - "~>"
128
+ - !ruby/object:Gem::Version
129
+ version: 0.5.11
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: 0.5.11
133
+ description: 'Find documentation here: https://github.com/leandrosardi/openai-scraper'
134
+ email: leandro@connectionsphere.com
135
+ executables: []
136
+ extensions: []
137
+ extra_rdoc_files: []
138
+ files:
139
+ - lib/openai-scraper.rb
140
+ homepage: https://rubygems.org/gems/openai-scraper
141
+ licenses:
142
+ - MIT
143
+ metadata: {}
144
+ post_install_message:
145
+ rdoc_options: []
146
+ require_paths:
147
+ - lib
148
+ required_ruby_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ required_rubygems_version: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - ">="
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ requirements: []
159
+ rubygems_version: 3.3.7
160
+ signing_key:
161
+ specification_version: 4
162
+ summary: Ruby library for automation operation on the GMass Deliverability Test and
163
+ Spam Checker.
164
+ test_files: []