openai-scraper 1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/openai-scraper.rb +285 -0
  3. metadata +164 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3640e18019f73e33ea331aac0b8834e0de4cc49311b43b0d578b5d1428d55ce9
4
+ data.tar.gz: 8a6f66459cd9597285c638ecc11751e4bd20722f28dddd1da864e65e81e9b954
5
+ SHA512:
6
+ metadata.gz: dc8c94459de87a753402bfd8ce76877116d1565b328519c5a8a64701bb0b89bf6f5b6fa90b33f9aae79e4ef165b85451b0c12d9b1b8550f875078e729d7f735e
7
+ data.tar.gz: 4a20f94caff31aae40299caaa4e06859467719823ea233a8dda77d8226e64526cd25b9a513886519d98c13fcbc5cf4eae1dc6b6d878ec87e3bc71c7bf138fe96
@@ -0,0 +1,285 @@
1
+ require 'nokogiri'
2
+ require 'mechanize'
3
+ require 'simple_cloud_logging'
4
+ require "openai"
5
+ require 'colorize'
6
+ require "io/console"
7
+
8
+ # require selenium
9
+ require 'selenium-webdriver'
10
+
11
+ =begin
12
+ def get_current_weather(location:, unit: "fahrenheit")
13
+ # use a weather api to fetch weather
14
+ { "temperature": 22, "unit": "celsius", "description": "Sunny" }
15
+ end
16
+
17
+ def wl(url)
18
+ BlackStack::OpenAIScraper.wl(url)
19
+ end
20
+ =end
21
+
22
+ module BlackStack
23
+ module OpenAIScraper
24
+ @@openai_apikey = nil
25
+ @@client = nil
26
+ @@browser = nil
27
+ @@history = []
28
+
29
+ # hints to show in the terminal
30
+ HINT1 = "HINT: The text below is a macr-generated prompt.".yellow
31
+
32
+ # name of the module
33
+ NAME = 'OpenAI Scraper'
34
+
35
+ # pronto shown in the console
36
+ PROMPT = 'openai-scraper'
37
+
38
+ def self.set(h)
39
+ @@openai_apikey = h[:openai_apikey] if h[:openai_apikey]
40
+ end
41
+
42
+ def self.init
43
+ @@client = OpenAI::Client.new(access_token: @@openai_apikey)
44
+ @@browser = Selenium::WebDriver.for :chrome
45
+ # load history array from the file ./history.json, only if the file exists
46
+ @@history = JSON.parse(File.read('./history.json')) if File.exist?('./history.json')
47
+ end
48
+
49
+ def self.finalize
50
+ @@browser.quit
51
+ # overrite the file ./history.json with the current history array
52
+ File.write('./history.json', @@history.to_json)
53
+ end
54
+
55
+ # help shown in the console
56
+ def self.help
57
+ "OpenAI Chatbot, With the Ability to Take Information from the Web.\n
58
+ List of Commands:\n
59
+ - \\q: quit\n
60
+ - \\wt <url>: download the web-page and pass the text content to the model for further reference.\n
61
+ - \\wl <url>: download the web-page and pass the list of links to the model for further reference.\n
62
+ "
63
+ end
64
+
65
+ def self.response(s)
66
+ prompt = s
67
+ # \wl <url>: download the web-page and pass the list of links to the model for further reference.\n
68
+ # find the url after the `\wl`, when \wl may be at any position into the string
69
+ i = 0
70
+ s.split(' ').each { |x|
71
+ if x == '\wl'
72
+ url = s.split(' ')[i+1]
73
+ prompt.gsub!(/\\wl #{url}/, BlackStack::OpenAIScraper.wl(url).to_s)
74
+ end
75
+ i += 1
76
+ }
77
+ #puts BlackStack::OpenAIScraper::HINT1
78
+ #puts prompt.blue
79
+
80
+ # \wt <url>: download the web-page and pass the text content to the model for further reference.\n
81
+ # find the url after the `\wl`, when \wl may be at any position into the string
82
+ i = 0
83
+ s.split(' ').each { |x|
84
+ if x == '\wt'
85
+ url = s.split(' ')[i+1]
86
+ prompt.gsub!(/\\wt #{url}/, BlackStack::OpenAIScraper.wt(url).to_s)
87
+ end
88
+ i += 1
89
+ }
90
+ #puts BlackStack::OpenAIScraper::HINT1
91
+ #puts prompt.blue
92
+
93
+ response = @@client.chat(
94
+ parameters: {
95
+ model: "gpt-3.5-turbo", # Required.
96
+ #max_tokens: 6000,
97
+ temperature: 0.5,
98
+ messages: [
99
+ { role: "user", content: prompt},
100
+ #{ role: "assistant", content: nil, function_call: {name: "get_current_weather", arguments: { location: "Boston, MA"}}},
101
+ #{ role: "function", name: "get_current_weather", content: { temperature: "22", unit: "celsius", description: "Sunny"}},
102
+
103
+ ], # Required.
104
+ =begin
105
+ functions: [
106
+ {
107
+ name: "wl",
108
+ description: "Extract the links from a web page",
109
+ parameters: {
110
+ type: :object,
111
+ properties: {
112
+ url: {
113
+ type: "string",
114
+ description: "The url of the web page"
115
+ },
116
+ },
117
+ required: ['url'],
118
+ },
119
+ },
120
+ {
121
+ name: "get_current_weather",
122
+ description: "Get the current weather in a given location",
123
+ parameters: {
124
+ type: :object,
125
+ properties: {
126
+ location: {
127
+ type: :string,
128
+ description: "The city and state, e.g. San Francisco, CA",
129
+ },
130
+ unit: {
131
+ type: "string",
132
+ enum: %w[celsius fahrenheit],
133
+ },
134
+ },
135
+ required: ["location"],
136
+ },
137
+ },
138
+ ],
139
+ =end
140
+ })
141
+ raise response.dig("error", "message") if response.dig("error", "message")
142
+ return response.dig("choices", 0, "message", "content")
143
+ end
144
+
145
+ # download the web page, and extract all links.
146
+ #
147
+ def self.wl(url)
148
+ # visit the url
149
+ @@browser.navigate.to url
150
+
151
+ # wait up to 30 seconds for the page to load
152
+ #wait = Selenium::WebDriver::Wait.new(:timeout => 30)
153
+ #wait.until { @@browser.execute_script("return document.readyState") == "complete" }
154
+
155
+ # wait up to 30 seconds for all ajax calls have been executed
156
+ #wait = Selenium::WebDriver::Wait.new(:timeout => 30)
157
+ #wait.until { @@browser.execute_script("return jQuery.active") == 0 }
158
+
159
+ # get all the links
160
+ links = @@browser.find_elements(:tag_name, 'a')
161
+
162
+ # add the links to a json structure
163
+ h = []
164
+ links.each do |link|
165
+ txt = link.text.to_s.strip
166
+ h << { 'href' => link['href'], 'text' => txt }
167
+ end
168
+
169
+ # return the prompt
170
+ #"I will share a json structure with with links. Please remember them for further reference:\n#{h.join("\n").to_json}"
171
+ #"I have the links in a webpage. Which one of these links is the link to the \"contact us\" page of the company? \n #{h.join("\n")}"
172
+ h
173
+ end # def wl
174
+
175
+ # download the web page, and extract the text.
176
+ #
177
+ def self.wt(url)
178
+ # visit the url
179
+ @@browser.navigate.to url
180
+
181
+ # wait up to 30 seconds for the page to load
182
+ #wait = Selenium::WebDriver::Wait.new(:timeout => 30)
183
+ #wait.until { @@browser.execute_script("return document.readyState") == "complete" }
184
+
185
+ # wait up to 30 seconds for all ajax calls have been executed
186
+ #wait = Selenium::WebDriver::Wait.new(:timeout => 30)
187
+ #wait.until { @@browser.execute_script("return jQuery.active") == 0 }
188
+
189
+ # return the text of the webpage
190
+ @@browser.find_element(:tag_name, 'body').text
191
+ end # def wt
192
+
193
+ # show the promt and wait for the user input
194
+ def self.console(l)
195
+ l = BlackStack::DummyLogger.new(nil) if l.nil?
196
+ begin
197
+ while true
198
+ prompt = nil
199
+ print "#{BlackStack::OpenAIScraper::PROMPT}> ".blue
200
+
201
+ # get the user input, char by char
202
+ s = ''
203
+ i = 0
204
+ while true
205
+ c = $stdin.getch
206
+ # if the user press enter, then break the loop
207
+ if c == "\n" || c == "\r"
208
+ puts
209
+ @@history << s
210
+ i = 0
211
+ break
212
+ # if the user press backspace, then remove the last char from the string
213
+ elsif c == "\u007F"
214
+ if i >= 1
215
+ i -= 1
216
+ s = s[0..-2]
217
+ print "\b \b"
218
+ end
219
+ # if the user press ctrl+c, then reset the prompt
220
+ elsif c == "\u0003"
221
+ puts
222
+ break
223
+ # if the user press arrow-up
224
+ elsif c == "\e"
225
+ d = [$stdin.getch, $stdin.getch].join
226
+ if d == "[A" && @@history.size > 0
227
+ # remove the current prompt
228
+ print "\b \b" * s.size
229
+ # take the last prompt from the history
230
+ s = @@history[-1]
231
+ # remove the last promt from the history
232
+ @@history = @@history[0..-2]
233
+ # insert the prompt as the first in the history
234
+ @@history.insert(0, s)
235
+ # print the prompt
236
+ i = s.size
237
+ print s.strip
238
+ elsif d == "[B" && @@history.size > 0
239
+ # remove the current prompt
240
+ print "\b \b" * s.size
241
+ # take the first prompt from the history
242
+ s = @@history[0]
243
+ # remove the first promt from the history
244
+ @@history = @@history[1..-1]
245
+ # insert the prompt as the last in the history
246
+ @@history.insert(-1, s)
247
+ # print the prompt
248
+ i = s.size
249
+ print s.strip
250
+ end
251
+ else
252
+ s += c
253
+ i += 1
254
+ print c
255
+ end
256
+ end # while true
257
+
258
+ # `\q` to quit
259
+ if s == '\q'
260
+ exit(0)
261
+ # `\h` for help
262
+ elsif s == '\h'
263
+ puts BlackStack::OpenAIScraper.help
264
+ next
265
+ else
266
+ prompt = s
267
+ end
268
+ # standard openai prompt
269
+ puts BlackStack::OpenAIScraper.response(prompt).to_s.green
270
+ end # while true
271
+
272
+ rescue SignalException, SystemExit, Interrupt => e
273
+ l.logs "Finalizing... "
274
+ BlackStack::OpenAIScraper.finalize
275
+ l.logf "done".green
276
+
277
+ l.log 'Bye!'
278
+ exit(0)
279
+ rescue => e
280
+ puts "Error: #{e.to_console.red}".red
281
+ end # begin
282
+ end # def console
283
+
284
+ end # module OpenAIScraper
285
+ end # module BlackStack
metadata ADDED
@@ -0,0 +1,164 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: openai-scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: '1.2'
5
+ platform: ruby
6
+ authors:
7
+ - Leandro Daniel Sardi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-07-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.13.10
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.13.10
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: 1.13.10
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.13.10
33
+ - !ruby/object:Gem::Dependency
34
+ name: simple_cloud_logging
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 1.2.2
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.2.2
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: 1.2.2
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.2.2
53
+ - !ruby/object:Gem::Dependency
54
+ name: colorize
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: 0.8.1
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 0.8.1
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: 0.8.1
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 0.8.1
73
+ - !ruby/object:Gem::Dependency
74
+ name: selenium-webdriver
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: 4.10.0
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 4.10.0
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 4.10.0
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 4.10.0
93
+ - !ruby/object:Gem::Dependency
94
+ name: ruby-openai
95
+ requirement: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: 4.2.0
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 4.2.0
103
+ type: :runtime
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: 4.2.0
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: 4.2.0
113
+ - !ruby/object:Gem::Dependency
114
+ name: io-console
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - "~>"
118
+ - !ruby/object:Gem::Version
119
+ version: 0.5.11
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: 0.5.11
123
+ type: :runtime
124
+ prerelease: false
125
+ version_requirements: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - "~>"
128
+ - !ruby/object:Gem::Version
129
+ version: 0.5.11
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: 0.5.11
133
+ description: 'Find documentation here: https://github.com/leandrosardi/openai-scraper'
134
+ email: leandro@connectionsphere.com
135
+ executables: []
136
+ extensions: []
137
+ extra_rdoc_files: []
138
+ files:
139
+ - lib/openai-scraper.rb
140
+ homepage: https://rubygems.org/gems/openai-scraper
141
+ licenses:
142
+ - MIT
143
+ metadata: {}
144
+ post_install_message:
145
+ rdoc_options: []
146
+ require_paths:
147
+ - lib
148
+ required_ruby_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ required_rubygems_version: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - ">="
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ requirements: []
159
+ rubygems_version: 3.3.7
160
+ signing_key:
161
+ specification_version: 4
162
+ summary: Ruby library for automation operation on the GMass Deliverability Test and
163
+ Spam Checker.
164
+ test_files: []