artvee_scraper 0.0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7b6975645d66e1f093ffb1a32bbc516039a25b3baeab5cc842c136bbc6a97911
4
- data.tar.gz: a5a112f811c7117970718d956020707927f8b611ef3c659fc2e19ed59507c893
3
+ metadata.gz: ad2436d422de3a69964170b72a3296a2f27062d05d395c217178ccac22c7fbed
4
+ data.tar.gz: d24d8a2d9533b58f9ccdfd8a92334f38950a5a49793cd837de42faa4a7fc0acb
5
5
  SHA512:
6
- metadata.gz: 331227e2dce6602d4518bb364f70e5e273e7185f1f4b4c408e5e8f4ac4e60b6596e38b3e445e2bdf86730bf6aa411b2c4bc518f974c5bc4832176e4ca726fa56
7
- data.tar.gz: 48936c5e55f8a103e056820469e5935424d7061fd9cfeb7b8ad96ed1c0eb3d2a562288bb43b10e45293f4c97e9ab2a28261c6547e0f6756585a7b6caefe41661
6
+ metadata.gz: b43f433cf1462e3dd90cab43d4c53e80190199bc8d8beba89c1f049c5a4f7ff52ca487e19680dea54427c5dfd6959bd8db8c7cc00a0309afbdee80681712a370
7
+ data.tar.gz: 1870b93e70763b2b1992e623a891a3f5bff5e4d187c5dc6934cb10d005047000533de2937f08a7b1903a7c7ae3eb4a25beb8a668021156b587461f8d33bde9b6
@@ -1,13 +1,13 @@
1
- # rubocop:disable Lint/MixedRegexpCaptureTypes
2
1
  # frozen_string_literal: true
3
2
 
4
- require 'open-uri'
5
- require 'nokogiri'
3
+ require_relative 'card'
4
+ require_relative 'http_fetcher'
6
5
 
6
+ # Scrapes art data from artvee.com
7
7
  class ArtveeScraper
8
8
  BASE_URL = 'https://artvee.com/'
9
9
  @arts = []
10
- @doc = Nokogiri::HTML(URI.open(BASE_URL))
10
+ @doc = ::Nokogiri::HTML(HttpFetcher.call(BASE_URL))
11
11
 
12
12
  class << self
13
13
  def scrape
@@ -18,55 +18,20 @@ class ArtveeScraper
18
18
  private
19
19
 
20
20
  def populate_arts
21
- @doc.search('.product-grid-item.product.woodmart-hover-tiled').each do |card|
22
- @arts << {
23
- img_url: big_pic_url(card.at('img').attributes['src'].value),
24
- title: title(card.at('h3').text),
25
- date: date(card.at('h3').text),
26
- artist: card.at('.woodmart-product-brands-links a')&.text,
27
- artist_details: artist_details(card.at('.woodmart-product-brands-links').text),
28
- tag: card.at('.woodmart-product-cats a')&.text
29
- }
21
+ @doc.search('.product-grid-item.product.woodmart-hover-tiled').each do |obj|
22
+ @arts << art_hash(Card.new(obj))
30
23
  end
31
24
  end
32
25
 
33
- def big_pic_url(original_url)
34
- original_url.sub(/ftmp/, 'sftb')
35
- end
36
-
37
- def title(h3_text)
38
- h3_text[..-2].match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)[:title]
39
- end
40
-
41
- def date(h3_text)
42
- h3_text[..-2].match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)[:date]
43
- end
44
-
45
- def artist_details(div_text)
46
- return {} if div_text.split('(').count < 2
47
-
48
- @details = div_text.split('(')[1][0..-2].split(', ')
49
- author_life_cycle.merge(nationality)
50
- end
51
-
52
- def author_life_cycle
53
- return { birth_date: @details.first } if @details.count == 1
54
- return { birth_date: @details.last } if @details.last.delete(' ').split(/-|–/).count == 1
55
-
56
- life_cycle_hash(@details.last.delete(' ').split(/-|–/))
57
- end
58
-
59
- def life_cycle_hash(life_cycle)
26
+ def art_hash(card)
60
27
  {
61
- birth_date: life_cycle.first,
62
- passing_date: life_cycle.last
28
+ img_url: card.img_url,
29
+ title: card.title,
30
+ date: card.date,
31
+ artist: card.artist,
32
+ artist_details: card.artist_details,
33
+ tag: card.tag
63
34
  }
64
35
  end
65
-
66
- def nationality
67
- return {} if @details.count == 1
68
-
69
- { nationality: @details.first }
70
- end
71
36
  end
72
37
  end
data/lib/card.rb ADDED
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Helps the ArtveeScraper class to populate the @arts array
4
+ class Card
5
+ attr_reader :img_url, :title, :date, :artist, :artist_details, :tag
6
+
7
+ def initialize(html_obj)
8
+ @html_obj = html_obj
9
+ set_img_url
10
+ set_date_and_title
11
+ set_artist
12
+ set_artist_details
13
+ set_tag
14
+ end
15
+
16
+ private
17
+
18
+ def set_img_url
19
+ img_src = @html_obj.at('img').attributes['src'].value
20
+ @img_url = img_src.sub(/ftmp/, 'sftb')
21
+ end
22
+
23
+ def set_date_and_title
24
+ h3_text = @html_obj.at('h3').text[..-2]
25
+ date_title = h3_text.match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)
26
+ @title = date_title[:title]
27
+ @date = date_title[:date]
28
+ end
29
+
30
+ def set_artist
31
+ @artist = @html_obj.at('.woodmart-product-brands-links a')&.text
32
+ end
33
+
34
+ def set_artist_details
35
+ div_text = @html_obj.at('.woodmart-product-brands-links').text
36
+ return {} if div_text.split('(').count < 2
37
+
38
+ @details = div_text.split('(')[1][0..-2].split(', ')
39
+ @artist_details = author_life_cycle.merge(nationality)
40
+ end
41
+
42
+ def set_tag
43
+ @tag = @html_obj.at('.woodmart-product-cats a')&.text
44
+ end
45
+
46
+ def author_life_cycle
47
+ return {} unless @details.count >= 1 && @details.last.match?(/\d+/)
48
+
49
+ life_cycle_hash(@details.last.delete(' ').split(/-|–/))
50
+ end
51
+
52
+ def life_cycle_hash(life_cycle)
53
+ {
54
+ birth_date: life_cycle[0],
55
+ passing_date: life_cycle[1]
56
+ }
57
+ end
58
+
59
+ def nationality
60
+ return {} unless @details.first.match?(/^[a-zA-Z]{2,}/)
61
+
62
+ { nationality: @details.first }
63
+ end
64
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Fetches the content of a given URL and returns its body only if the HTTP request receives a 200 response.
4
+ class HttpFetcher
5
+ def self.call(url)
6
+ uri = ::URI.parse(url)
7
+
8
+ response = Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
9
+ http.request(Net::HTTP::Get.new(uri.request_uri))
10
+ end
11
+
12
+ response.body if response.code == '200'
13
+ end
14
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: artvee_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leon Siqueira
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-25 00:00:00.000000000 Z
11
+ date: 2023-07-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A gem that gets titles, dates, artist, image URLs, etc. and returns as
14
14
  a Hash
@@ -18,6 +18,8 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/artvee_scraper.rb
21
+ - lib/card.rb
22
+ - lib/http_fetcher.rb
21
23
  homepage: https://github.com/leon-siqueira/artvee-scraper
22
24
  licenses:
23
25
  - MIT
@@ -30,7 +32,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
30
32
  requirements:
31
33
  - - ">="
32
34
  - !ruby/object:Gem::Version
33
- version: '0'
35
+ version: '3.1'
34
36
  required_rubygems_version: !ruby/object:Gem::Requirement
35
37
  requirements:
36
38
  - - ">="