artvee_scraper 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/artvee_scraper.rb +72 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7b6975645d66e1f093ffb1a32bbc516039a25b3baeab5cc842c136bbc6a97911
4
+ data.tar.gz: a5a112f811c7117970718d956020707927f8b611ef3c659fc2e19ed59507c893
5
+ SHA512:
6
+ metadata.gz: 331227e2dce6602d4518bb364f70e5e273e7185f1f4b4c408e5e8f4ac4e60b6596e38b3e445e2bdf86730bf6aa411b2c4bc518f974c5bc4832176e4ca726fa56
7
+ data.tar.gz: 48936c5e55f8a103e056820469e5935424d7061fd9cfeb7b8ad96ed1c0eb3d2a562288bb43b10e45293f4c97e9ab2a28261c6547e0f6756585a7b6caefe41661
@@ -0,0 +1,72 @@
1
+ # rubocop:disable Lint/MixedRegexpCaptureTypes
2
+ # frozen_string_literal: true
3
+
4
+ require 'open-uri'
5
+ require 'nokogiri'
6
+
7
+ class ArtveeScraper
8
+ BASE_URL = 'https://artvee.com/'
9
+ @arts = []
10
+ @doc = Nokogiri::HTML(URI.open(BASE_URL))
11
+
12
+ class << self
13
+ def scrape
14
+ populate_arts
15
+ @arts
16
+ end
17
+
18
+ private
19
+
20
+ def populate_arts
21
+ @doc.search('.product-grid-item.product.woodmart-hover-tiled').each do |card|
22
+ @arts << {
23
+ img_url: big_pic_url(card.at('img').attributes['src'].value),
24
+ title: title(card.at('h3').text),
25
+ date: date(card.at('h3').text),
26
+ artist: card.at('.woodmart-product-brands-links a')&.text,
27
+ artist_details: artist_details(card.at('.woodmart-product-brands-links').text),
28
+ tag: card.at('.woodmart-product-cats a')&.text
29
+ }
30
+ end
31
+ end
32
+
33
+ def big_pic_url(original_url)
34
+ original_url.sub(/ftmp/, 'sftb')
35
+ end
36
+
37
+ def title(h3_text)
38
+ h3_text[..-2].match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)[:title]
39
+ end
40
+
41
+ def date(h3_text)
42
+ h3_text[..-2].match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)[:date]
43
+ end
44
+
45
+ def artist_details(div_text)
46
+ return {} if div_text.split('(').count < 2
47
+
48
+ @details = div_text.split('(')[1][0..-2].split(', ')
49
+ author_life_cycle.merge(nationality)
50
+ end
51
+
52
+ def author_life_cycle
53
+ return { birth_date: @details.first } if @details.count == 1
54
+ return { birth_date: @details.last } if @details.last.delete(' ').split(/-|–/).count == 1
55
+
56
+ life_cycle_hash(@details.last.delete(' ').split(/-|–/))
57
+ end
58
+
59
+ def life_cycle_hash(life_cycle)
60
+ {
61
+ birth_date: life_cycle.first,
62
+ passing_date: life_cycle.last
63
+ }
64
+ end
65
+
66
+ def nationality
67
+ return {} if @details.count == 1
68
+
69
+ { nationality: @details.first }
70
+ end
71
+ end
72
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: artvee_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Leon Siqueira
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-05-25 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A gem that gets titles, dates, artist, image URLs, etc. and returns as
14
+ a Hash
15
+ email: leon.siqueir4@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/artvee_scraper.rb
21
+ homepage: https://github.com/leon-siqueira/artvee-scraper
22
+ licenses:
23
+ - MIT
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubygems_version: 3.3.24
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Get art data from artvee.com
44
+ test_files: []