artvee_scraper 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/artvee_scraper.rb +72 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7b6975645d66e1f093ffb1a32bbc516039a25b3baeab5cc842c136bbc6a97911
|
4
|
+
data.tar.gz: a5a112f811c7117970718d956020707927f8b611ef3c659fc2e19ed59507c893
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 331227e2dce6602d4518bb364f70e5e273e7185f1f4b4c408e5e8f4ac4e60b6596e38b3e445e2bdf86730bf6aa411b2c4bc518f974c5bc4832176e4ca726fa56
|
7
|
+
data.tar.gz: 48936c5e55f8a103e056820469e5935424d7061fd9cfeb7b8ad96ed1c0eb3d2a562288bb43b10e45293f4c97e9ab2a28261c6547e0f6756585a7b6caefe41661
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# rubocop:disable Lint/MixedRegexpCaptureTypes
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'open-uri'
|
5
|
+
require 'nokogiri'
|
6
|
+
|
7
|
+
class ArtveeScraper
|
8
|
+
BASE_URL = 'https://artvee.com/'
|
9
|
+
@arts = []
|
10
|
+
@doc = Nokogiri::HTML(URI.open(BASE_URL))
|
11
|
+
|
12
|
+
class << self
|
13
|
+
def scrape
|
14
|
+
populate_arts
|
15
|
+
@arts
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def populate_arts
|
21
|
+
@doc.search('.product-grid-item.product.woodmart-hover-tiled').each do |card|
|
22
|
+
@arts << {
|
23
|
+
img_url: big_pic_url(card.at('img').attributes['src'].value),
|
24
|
+
title: title(card.at('h3').text),
|
25
|
+
date: date(card.at('h3').text),
|
26
|
+
artist: card.at('.woodmart-product-brands-links a')&.text,
|
27
|
+
artist_details: artist_details(card.at('.woodmart-product-brands-links').text),
|
28
|
+
tag: card.at('.woodmart-product-cats a')&.text
|
29
|
+
}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def big_pic_url(original_url)
|
34
|
+
original_url.sub(/ftmp/, 'sftb')
|
35
|
+
end
|
36
|
+
|
37
|
+
def title(h3_text)
|
38
|
+
h3_text[..-2].match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)[:title]
|
39
|
+
end
|
40
|
+
|
41
|
+
def date(h3_text)
|
42
|
+
h3_text[..-2].match(/^(?<title>.+?)\s*(\((?<date>[^)]+)\))?$/)[:date]
|
43
|
+
end
|
44
|
+
|
45
|
+
def artist_details(div_text)
|
46
|
+
return {} if div_text.split('(').count < 2
|
47
|
+
|
48
|
+
@details = div_text.split('(')[1][0..-2].split(', ')
|
49
|
+
author_life_cycle.merge(nationality)
|
50
|
+
end
|
51
|
+
|
52
|
+
def author_life_cycle
|
53
|
+
return { birth_date: @details.first } if @details.count == 1
|
54
|
+
return { birth_date: @details.last } if @details.last.delete(' ').split(/-|–/).count == 1
|
55
|
+
|
56
|
+
life_cycle_hash(@details.last.delete(' ').split(/-|–/))
|
57
|
+
end
|
58
|
+
|
59
|
+
def life_cycle_hash(life_cycle)
|
60
|
+
{
|
61
|
+
birth_date: life_cycle.first,
|
62
|
+
passing_date: life_cycle.last
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
def nationality
|
67
|
+
return {} if @details.count == 1
|
68
|
+
|
69
|
+
{ nationality: @details.first }
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: artvee_scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Leon Siqueira
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-05-25 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A gem that gets titles, dates, artist, image URLs, etc. and returns as
|
14
|
+
a Hash
|
15
|
+
email: leon.siqueir4@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/artvee_scraper.rb
|
21
|
+
homepage: https://github.com/leon-siqueira/artvee-scraper
|
22
|
+
licenses:
|
23
|
+
- MIT
|
24
|
+
metadata: {}
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubygems_version: 3.3.24
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: Get art data from artvee.com
|
44
|
+
test_files: []
|