artvee_scraper 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7b6975645d66e1f093ffb1a32bbc516039a25b3baeab5cc842c136bbc6a97911
4
- data.tar.gz: a5a112f811c7117970718d956020707927f8b611ef3c659fc2e19ed59507c893
3
+ metadata.gz: ad2436d422de3a69964170b72a3296a2f27062d05d395c217178ccac22c7fbed
4
+ data.tar.gz: d24d8a2d9533b58f9ccdfd8a92334f38950a5a49793cd837de42faa4a7fc0acb
5
5
  SHA512:
6
- metadata.gz: 331227e2dce6602d4518bb364f70e5e273e7185f1f4b4c408e5e8f4ac4e60b6596e38b3e445e2bdf86730bf6aa411b2c4bc518f974c5bc4832176e4ca726fa56
7
- data.tar.gz: 48936c5e55f8a103e056820469e5935424d7061fd9cfeb7b8ad96ed1c0eb3d2a562288bb43b10e45293f4c97e9ab2a28261c6547e0f6756585a7b6caefe41661
6
+ metadata.gz: b43f433cf1462e3dd90cab43d4c53e80190199bc8d8beba89c1f049c5a4f7ff52ca487e19680dea54427c5dfd6959bd8db8c7cc00a0309afbdee80681712a370
7
+ data.tar.gz: 1870b93e70763b2b1992e623a891a3f5bff5e4d187c5dc6934cb10d005047000533de2937f08a7b1903a7c7ae3eb4a25beb8a668021156b587461f8d33bde9b6
@@ -1,13 +1,13 @@
1
- # rubocop:disable Lint/MixedRegexpCaptureTypes
2
1
  # frozen_string_literal: true
3
2
 
4
- require 'open-uri'
5
- require 'nokogiri'
3
+ require_relative 'card'
4
+ require_relative 'http_fetcher'
6
5
 
6
+ # Scrapes art data from artvee.com
7
7
  class ArtveeScraper
8
8
  BASE_URL = 'https://artvee.com/'
9
9
  @arts = []
10
- @doc = Nokogiri::HTML(URI.open(BASE_URL))
10
+ @doc = ::Nokogiri::HTML(HttpFetcher.call(BASE_URL))
11
11
 
12
12
  class << self
13
13
  def scrape
@@ -18,55 +18,20 @@ class ArtveeScraper
18
18
  private
19
19
 
20
20
  def populate_arts
21
- @doc.search('.product-grid-item.product.woodmart-hover-tiled').each do |card|
22
- @arts << {
23
- img_url: big_pic_url(card.at('img').attributes['src'].value),
24
- title: title(card.at('h3').text),
25
- date: date(card.at('h3').text),
26
- artist: card.at('.woodmart-product-brands-links a')&.text,
27
- artist_details: artist_details(card.at('.woodmart-product-brands-links').text),
28
- tag: card.at('.woodmart-product-cats a')&.text
29
- }
21
+ @doc.search('.product-grid-item.product.woodmart-hover-tiled').each do |obj|
22
+ @arts << art_hash(Card.new(obj))
30
23
  end
31
24
  end
32
25
 
33
- def big_pic_url(original_url)
34
- original_url.sub(/ftmp/, 'sftb')
35
- end
36
-
37
- def title(h3_text)
38
- h3_text[..-2].match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)[:title]
39
- end
40
-
41
- def date(h3_text)
42
- h3_text[..-2].match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)[:date]
43
- end
44
-
45
- def artist_details(div_text)
46
- return {} if div_text.split('(').count < 2
47
-
48
- @details = div_text.split('(')[1][0..-2].split(', ')
49
- author_life_cycle.merge(nationality)
50
- end
51
-
52
- def author_life_cycle
53
- return { birth_date: @details.first } if @details.count == 1
54
- return { birth_date: @details.last } if @details.last.delete(' ').split(/-|–/).count == 1
55
-
56
- life_cycle_hash(@details.last.delete(' ').split(/-|–/))
57
- end
58
-
59
- def life_cycle_hash(life_cycle)
26
+ def art_hash(card)
60
27
  {
61
- birth_date: life_cycle.first,
62
- passing_date: life_cycle.last
28
+ img_url: card.img_url,
29
+ title: card.title,
30
+ date: card.date,
31
+ artist: card.artist,
32
+ artist_details: card.artist_details,
33
+ tag: card.tag
63
34
  }
64
35
  end
65
-
66
- def nationality
67
- return {} if @details.count == 1
68
-
69
- { nationality: @details.first }
70
- end
71
36
  end
72
37
  end
data/lib/card.rb ADDED
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Helps the ArtveeScraper class to populate the @arts array
4
+ class Card
5
+ attr_reader :img_url, :title, :date, :artist, :artist_details, :tag
6
+
7
+ def initialize(html_obj)
8
+ @html_obj = html_obj
9
+ set_img_url
10
+ set_date_and_title
11
+ set_artist
12
+ set_artist_details
13
+ set_tag
14
+ end
15
+
16
+ private
17
+
18
+ def set_img_url
19
+ img_src = @html_obj.at('img').attributes['src'].value
20
+ @img_url = img_src.sub(/ftmp/, 'sftb')
21
+ end
22
+
23
+ def set_date_and_title
24
+ h3_text = @html_obj.at('h3').text[..-2]
25
+ date_title = h3_text.match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)
26
+ @title = date_title[:title]
27
+ @date = date_title[:date]
28
+ end
29
+
30
+ def set_artist
31
+ @artist = @html_obj.at('.woodmart-product-brands-links a')&.text
32
+ end
33
+
34
+ def set_artist_details
35
+ div_text = @html_obj.at('.woodmart-product-brands-links').text
36
+ return {} if div_text.split('(').count < 2
37
+
38
+ @details = div_text.split('(')[1][0..-2].split(', ')
39
+ @artist_details = author_life_cycle.merge(nationality)
40
+ end
41
+
42
+ def set_tag
43
+ @tag = @html_obj.at('.woodmart-product-cats a')&.text
44
+ end
45
+
46
+ def author_life_cycle
47
+ return {} unless @details.count >= 1 && @details.last.match?(/\d+/)
48
+
49
+ life_cycle_hash(@details.last.delete(' ').split(/-|–/))
50
+ end
51
+
52
+ def life_cycle_hash(life_cycle)
53
+ {
54
+ birth_date: life_cycle[0],
55
+ passing_date: life_cycle[1]
56
+ }
57
+ end
58
+
59
+ def nationality
60
+ return {} unless @details.first.match?(/^[a-zA-Z]{2,}/)
61
+
62
+ { nationality: @details.first }
63
+ end
64
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Fetches the content of a given URL and returns its body only if the HTTP request receives a 200 response.
4
+ class HttpFetcher
5
+ def self.call(url)
6
+ uri = ::URI.parse(url)
7
+
8
+ response = Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
9
+ http.request(Net::HTTP::Get.new(uri.request_uri))
10
+ end
11
+
12
+ response.body if response.code == '200'
13
+ end
14
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: artvee_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leon Siqueira
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-25 00:00:00.000000000 Z
11
+ date: 2023-07-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A gem that gets titles, dates, artist, image URLs, etc. and returns as
14
14
  a Hash
@@ -18,6 +18,8 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/artvee_scraper.rb
21
+ - lib/card.rb
22
+ - lib/http_fetcher.rb
21
23
  homepage: https://github.com/leon-siqueira/artvee-scraper
22
24
  licenses:
23
25
  - MIT
@@ -30,7 +32,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
30
32
  requirements:
31
33
  - - ">="
32
34
  - !ruby/object:Gem::Version
33
- version: '0'
35
+ version: '3.1'
34
36
  required_rubygems_version: !ruby/object:Gem::Requirement
35
37
  requirements:
36
38
  - - ">="