webshaker 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e700f078decefd7d40b3469a5ae290c0746481a8d13f214c4020805b10660f20
4
- data.tar.gz: 00cbe24e7355ba81e2d54dece6a1e2272e6aae8db52f929eae13053a1d5082b2
3
+ metadata.gz: ec1a0256ab5ff877a2b20e7ca35a38dda6d0bfad0381e1af9babdacbe164a6f9
4
+ data.tar.gz: 580cfbdf80f08e8da800867d10e1811c171aebdc290b9360831cd46a071ffd57
5
5
  SHA512:
6
- metadata.gz: 91082a99565064c4ad876d1b175024bfc5dc2a6291d16f2f22eb9613903eba3e34da16e196d0823e10547eaec9b2b9948c57835894e50d4adeaf18daa7adf2a5
7
- data.tar.gz: 9d477bbd2420dd3a3b8c3938e26edd0a2c4e0f911743b41094d6991fae54931e2c768469cb3cb7e38b34ec84ef5fd04e54b02c40f522635c9aa6ffdd0710b51c
6
+ metadata.gz: 93ef3de4bf2e26f92850408083acd86c65e3033aaa51276ac280bf86e1bbf7ce5d5644f38e663647bea76ca10c9ea07a170a309ac1ba3dd1fe5ec2245c2107ab
7
+ data.tar.gz: a47e4a3105206654cdfcdc9c28f3d10710ed2d14f346daf92e43639eefe9c9523eb9e7a69984af2ca0f79e175881a8303a9e4252ea9963d1149ce1d75c210714
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- webshaker (0.0.5)
4
+ webshaker (0.0.6)
5
5
  nokogiri
6
6
  ruby-openai
7
7
  selenium-webdriver (~> 4.0)
data/lib/webshaker/ai.rb CHANGED
@@ -1,15 +1,18 @@
1
1
  module Webshaker
2
2
  class Ai
3
- attr_reader :html_content
3
+ attr_reader :html_content, :status_update
4
4
 
5
- def initialize(html_content)
5
+ def initialize(html_content, status_update: ->(status) {})
6
6
  @html_content = html_content
7
+ @status_update = status_update
7
8
  end
8
9
 
9
- def analyze(with_prompt:, respond_with: :text, temperature: 0.8, full_response: false)
10
+ def analyze(with_prompt:, model: Webshaker.config.model, respond_with: :text, temperature: 0.8, full_response: false)
11
+ status_update.call(:ai_start)
12
+
10
13
  response = ai_client.chat(
11
14
  parameters: {
12
- model: Webshaker.config.model,
15
+ model:,
13
16
  messages: messages(with_prompt).concat((respond_with.to_sym == :json) ? [{role: "user", content: "respond with json"}] : []),
14
17
  temperature:
15
18
  }.merge(
@@ -18,10 +21,15 @@ module Webshaker
18
21
  )
19
22
 
20
23
  # Return full response from the ai client if the respond_with is set to :full
21
- return response if full_response
24
+ if full_response
25
+ status_update.call(:ai_done)
26
+ return response
27
+ end
22
28
 
23
29
  response = response["choices"][0]["message"]["content"]
24
30
  response = JSON.parse(response) if respond_with.to_sym === :json
31
+
32
+ status_update.call(:ai_done)
25
33
  response
26
34
  end
27
35
 
@@ -4,11 +4,15 @@ require "nokogiri"
4
4
 
5
5
  module Webshaker
6
6
  class Scraper
7
- attr_reader :url, :driver, :options
7
+ attr_reader :url, :driver, :options, :status_update
8
8
 
9
- def initialize(url, options = {})
9
+ def initialize(url, options = {}, status_update: ->(status) {})
10
10
  @url = url
11
11
  @options = options
12
+ @status_update = status_update
13
+
14
+ status_update.call(:scrape_init)
15
+
12
16
  @driver = Selenium::WebDriver.for(
13
17
  :chrome,
14
18
  options: Selenium::WebDriver::Chrome::Options.new.tap(&method(:configure))
@@ -16,14 +20,19 @@ module Webshaker
16
20
  end
17
21
 
18
22
  def scrape
23
+ status_update.call(:scrape_start)
24
+
19
25
  driver.navigate.to url
20
26
 
21
27
  do_wait
22
28
 
23
29
  screenshot = driver.screenshot_as :base64
24
- html_content = driver.page_source
30
+ html_content = clean_up(driver.page_source)
25
31
  driver.quit
26
- ScrapeResult.new(screenshot, clean_up(html_content))
32
+
33
+ status_update.call(:scrape_done)
34
+
35
+ ScrapeResult.new(screenshot, html_content)
27
36
  end
28
37
 
29
38
  def self.scrape(url, options = {})
@@ -44,6 +53,8 @@ module Webshaker
44
53
 
45
54
  wait = Selenium::WebDriver::Wait.new(timeout: 10) # Waits a maximum of 10 seconds
46
55
 
56
+ status_update.call(:scrape_waiting)
57
+
47
58
  wait.until do
48
59
  if wait_until.is_a?(Proc)
49
60
  wait_until.call(driver)
@@ -56,6 +67,7 @@ module Webshaker
56
67
 
57
68
  def clean_up(html_content)
58
69
  # Parse the HTML content
70
+ status_update.call(:scrape_cleaning)
59
71
 
60
72
  doc = Nokogiri::HTML5(html_content)
61
73
 
@@ -2,11 +2,12 @@ require "openai"
2
2
 
3
3
  module Webshaker
4
4
  class Shaker
5
- attr_reader :url, :scrape_options
5
+ attr_reader :url, :scrape_options, :status_update
6
6
 
7
- def initialize(url, scrape_options = {})
7
+ def initialize(url, scrape_options = {}, status_update: ->(status) {})
8
8
  @url = url
9
9
  @scrape_options = scrape_options
10
+ @status_update = status_update
10
11
  end
11
12
 
12
13
  def shake(with_prompt:, respond_with: :text, temperature: 0.8)
@@ -16,11 +17,11 @@ module Webshaker
16
17
  private
17
18
 
18
19
  def html
19
- @html ||= Webshaker::Scraper.new(url, scrape_options).scrape.html
20
+ @html ||= Webshaker::Scraper.new(url, scrape_options, status_update:).scrape.html
20
21
  end
21
22
 
22
23
  def ai
23
- @ai ||= Webshaker::Ai.new(html)
24
+ @ai ||= Webshaker::Ai.new(html, status_update:)
24
25
  end
25
26
  end
26
27
  end
@@ -1,3 +1,3 @@
1
1
  module Webshaker
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webshaker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Mochetti
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-16 00:00:00.000000000 Z
11
+ date: 2024-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: selenium-webdriver