artvee_scraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/artvee_scraper.rb +72 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7b6975645d66e1f093ffb1a32bbc516039a25b3baeab5cc842c136bbc6a97911
4
+ data.tar.gz: a5a112f811c7117970718d956020707927f8b611ef3c659fc2e19ed59507c893
5
+ SHA512:
6
+ metadata.gz: 331227e2dce6602d4518bb364f70e5e273e7185f1f4b4c408e5e8f4ac4e60b6596e38b3e445e2bdf86730bf6aa411b2c4bc518f974c5bc4832176e4ca726fa56
7
+ data.tar.gz: 48936c5e55f8a103e056820469e5935424d7061fd9cfeb7b8ad96ed1c0eb3d2a562288bb43b10e45293f4c97e9ab2a28261c6547e0f6756585a7b6caefe41661
@@ -0,0 +1,72 @@
1
+ # rubocop:disable Lint/MixedRegexpCaptureTypes
2
+ # frozen_string_literal: true
3
+
4
+ require 'open-uri'
5
+ require 'nokogiri'
6
+
7
+ class ArtveeScraper
8
+ BASE_URL = 'https://artvee.com/'
9
+ @arts = []
10
+ @doc = Nokogiri::HTML(URI.open(BASE_URL))
11
+
12
+ class << self
13
+ def scrape
14
+ populate_arts
15
+ @arts
16
+ end
17
+
18
+ private
19
+
20
+ def populate_arts
21
+ @doc.search('.product-grid-item.product.woodmart-hover-tiled').each do |card|
22
+ @arts << {
23
+ img_url: big_pic_url(card.at('img').attributes['src'].value),
24
+ title: title(card.at('h3').text),
25
+ date: date(card.at('h3').text),
26
+ artist: card.at('.woodmart-product-brands-links a')&.text,
27
+ artist_details: artist_details(card.at('.woodmart-product-brands-links').text),
28
+ tag: card.at('.woodmart-product-cats a')&.text
29
+ }
30
+ end
31
+ end
32
+
33
+ def big_pic_url(original_url)
34
+ original_url.sub(/ftmp/, 'sftb')
35
+ end
36
+
37
+ def title(h3_text)
38
+ h3_text[..-2].match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)[:title]
39
+ end
40
+
41
+ def date(h3_text)
42
+ h3_text[..-2].match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)[:date]
43
+ end
44
+
45
+ def artist_details(div_text)
46
+ return {} if div_text.split('(').count < 2
47
+
48
+ @details = div_text.split('(')[1][0..-2].split(', ')
49
+ author_life_cycle.merge(nationality)
50
+ end
51
+
52
+ def author_life_cycle
53
+ return { birth_date: @details.first } if @details.count == 1
54
+ return { birth_date: @details.last } if @details.last.delete(' ').split(/-|–/).count == 1
55
+
56
+ life_cycle_hash(@details.last.delete(' ').split(/-|–/))
57
+ end
58
+
59
+ def life_cycle_hash(life_cycle)
60
+ {
61
+ birth_date: life_cycle.first,
62
+ passing_date: life_cycle.last
63
+ }
64
+ end
65
+
66
+ def nationality
67
+ return {} if @details.count == 1
68
+
69
+ { nationality: @details.first }
70
+ end
71
+ end
72
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: artvee_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Leon Siqueira
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-05-25 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A gem that gets titles, dates, artist, image URLs, etc. and returns as
14
+ a Hash
15
+ email: leon.siqueir4@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/artvee_scraper.rb
21
+ homepage: https://github.com/leon-siqueira/artvee-scraper
22
+ licenses:
23
+ - MIT
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubygems_version: 3.3.24
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Get art data from artvee.com
44
+ test_files: []