curation 1.10 → 1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +33 -44
- data/curation.gemspec +1 -1
- data/lib/curation/version.rb +1 -1
- data/lib/curation.rb +30 -11
- metadata +7 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bcbcb0a8ecb81b659e2ae2aa35e27acac6006265cf2743b6394f81203c634425
|
|
4
|
+
data.tar.gz: 25b25c7f30be8f9b004cec7efbd41caae4e69e87000ff585b92c8377b7855439
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 420056127e2c0ca86a4ad2e08fb37aa3da31a394264e4d7807a844dd9db61bc44124d6e978a076d9f50a1df37f7a5f80cc782145f54d11c7c3ce00566feca80d
|
|
7
|
+
data.tar.gz: a3b387e1c7345968c0eaaf1d2e8fbdbf752aa8b7a8b1e3b3807f206c43e7b096419b61b907622065f690d00a55b933038f0f3ed1b4fefa4d970f272afc1438d5
|
data/Gemfile.lock
CHANGED
|
@@ -1,88 +1,77 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
curation (1.
|
|
4
|
+
curation (1.11)
|
|
5
5
|
htmlentities
|
|
6
|
-
metainspector
|
|
6
|
+
metainspector
|
|
7
7
|
nokogiri
|
|
8
8
|
|
|
9
9
|
GEM
|
|
10
10
|
remote: https://rubygems.org/
|
|
11
11
|
specs:
|
|
12
|
-
addressable (2.8.
|
|
13
|
-
public_suffix (>= 2.0.2, <
|
|
12
|
+
addressable (2.8.5)
|
|
13
|
+
public_suffix (>= 2.0.2, < 6.0)
|
|
14
14
|
ansi (1.5.0)
|
|
15
15
|
builder (3.2.4)
|
|
16
16
|
byebug (11.1.3)
|
|
17
17
|
domain_name (0.5.20190701)
|
|
18
18
|
unf (>= 0.0.5, < 1.0.0)
|
|
19
|
-
faraday (
|
|
20
|
-
faraday-
|
|
21
|
-
faraday-em_synchrony (~> 1.0)
|
|
22
|
-
faraday-excon (~> 1.1)
|
|
23
|
-
faraday-httpclient (~> 1.0)
|
|
24
|
-
faraday-multipart (~> 1.0)
|
|
25
|
-
faraday-net_http (~> 1.0)
|
|
26
|
-
faraday-net_http_persistent (~> 1.0)
|
|
27
|
-
faraday-patron (~> 1.0)
|
|
28
|
-
faraday-rack (~> 1.0)
|
|
29
|
-
faraday-retry (~> 1.0)
|
|
19
|
+
faraday (2.7.10)
|
|
20
|
+
faraday-net_http (>= 2.0, < 3.1)
|
|
30
21
|
ruby2_keywords (>= 0.0.4)
|
|
31
22
|
faraday-cookie_jar (0.0.7)
|
|
32
23
|
faraday (>= 0.8.0)
|
|
33
24
|
http-cookie (~> 1.0.0)
|
|
34
|
-
faraday-em_http (1.0.0)
|
|
35
|
-
faraday-em_synchrony (1.0.0)
|
|
36
25
|
faraday-encoding (0.0.5)
|
|
37
26
|
faraday
|
|
38
|
-
faraday-
|
|
39
|
-
|
|
27
|
+
faraday-follow_redirects (0.3.0)
|
|
28
|
+
faraday (>= 1, < 3)
|
|
29
|
+
faraday-gzip (1.0.0)
|
|
30
|
+
faraday (>= 1.0)
|
|
31
|
+
zlib (~> 2.1)
|
|
32
|
+
faraday-http-cache (2.5.0)
|
|
40
33
|
faraday (>= 0.8)
|
|
41
|
-
faraday-
|
|
42
|
-
faraday-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
faraday-net_http_persistent (1.2.0)
|
|
46
|
-
faraday-patron (1.0.0)
|
|
47
|
-
faraday-rack (1.0.0)
|
|
48
|
-
faraday-retry (1.0.3)
|
|
49
|
-
faraday_middleware (1.2.0)
|
|
50
|
-
faraday (~> 1.0)
|
|
51
|
-
fastimage (2.2.6)
|
|
34
|
+
faraday-net_http (3.0.2)
|
|
35
|
+
faraday-retry (2.2.0)
|
|
36
|
+
faraday (~> 2.0)
|
|
37
|
+
fastimage (2.2.7)
|
|
52
38
|
htmlentities (4.3.4)
|
|
53
39
|
http-cookie (1.0.5)
|
|
54
40
|
domain_name (~> 0.5)
|
|
55
|
-
metainspector (5.
|
|
56
|
-
addressable (~> 2.
|
|
57
|
-
faraday (
|
|
41
|
+
metainspector (5.15.0)
|
|
42
|
+
addressable (~> 2.8.4)
|
|
43
|
+
faraday (~> 2.5)
|
|
58
44
|
faraday-cookie_jar (~> 0.0)
|
|
59
45
|
faraday-encoding (~> 0.0)
|
|
60
|
-
faraday-
|
|
61
|
-
|
|
46
|
+
faraday-follow_redirects (~> 0.3)
|
|
47
|
+
faraday-gzip (>= 0.1, < 2.0)
|
|
48
|
+
faraday-http-cache (~> 2.5)
|
|
49
|
+
faraday-retry (~> 2.0)
|
|
62
50
|
fastimage (~> 2.2)
|
|
63
51
|
nesty (~> 1.0)
|
|
64
|
-
nokogiri (~> 1.
|
|
65
|
-
minitest (5.
|
|
66
|
-
minitest-reporters (1.
|
|
52
|
+
nokogiri (~> 1.13)
|
|
53
|
+
minitest (5.19.0)
|
|
54
|
+
minitest-reporters (1.6.1)
|
|
67
55
|
ansi
|
|
68
56
|
builder
|
|
69
57
|
minitest (>= 5.0)
|
|
70
58
|
ruby-progressbar
|
|
71
|
-
multipart-post (2.2.0)
|
|
72
59
|
nesty (1.0.2)
|
|
73
|
-
nokogiri (1.
|
|
60
|
+
nokogiri (1.15.4-x86_64-darwin)
|
|
74
61
|
racc (~> 1.4)
|
|
75
|
-
public_suffix (
|
|
76
|
-
racc (1.
|
|
62
|
+
public_suffix (5.0.3)
|
|
63
|
+
racc (1.7.1)
|
|
77
64
|
rake (12.3.3)
|
|
78
|
-
ruby-progressbar (1.
|
|
65
|
+
ruby-progressbar (1.13.0)
|
|
79
66
|
ruby2_keywords (0.0.5)
|
|
80
67
|
unf (0.1.4)
|
|
81
68
|
unf_ext
|
|
82
69
|
unf_ext (0.0.8.2)
|
|
70
|
+
zlib (2.1.1)
|
|
83
71
|
|
|
84
72
|
PLATFORMS
|
|
85
73
|
x86_64-darwin-21
|
|
74
|
+
x86_64-darwin-22
|
|
86
75
|
|
|
87
76
|
DEPENDENCIES
|
|
88
77
|
byebug
|
|
@@ -92,4 +81,4 @@ DEPENDENCIES
|
|
|
92
81
|
rake (~> 12.0)
|
|
93
82
|
|
|
94
83
|
BUNDLED WITH
|
|
95
|
-
2.
|
|
84
|
+
2.4.6
|
data/curation.gemspec
CHANGED
|
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
|
21
21
|
spec.bindir = "exe"
|
|
22
22
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
23
23
|
spec.require_paths = ["lib"]
|
|
24
|
-
spec.add_dependency "metainspector"
|
|
24
|
+
spec.add_dependency "metainspector"
|
|
25
25
|
spec.add_dependency "nokogiri"
|
|
26
26
|
spec.add_dependency "htmlentities"
|
|
27
27
|
end
|
data/lib/curation/version.rb
CHANGED
data/lib/curation.rb
CHANGED
|
@@ -8,6 +8,7 @@ module Curation
|
|
|
8
8
|
|
|
9
9
|
class Page
|
|
10
10
|
attr_reader :url
|
|
11
|
+
attr_accessor :verbose
|
|
11
12
|
|
|
12
13
|
BLACKLIST = [
|
|
13
14
|
'head', 'script', 'style', 'iframe', 'nav', 'noscript', 'header', 'footer', 'aside',
|
|
@@ -23,6 +24,7 @@ module Curation
|
|
|
23
24
|
def initialize(url, html = nil)
|
|
24
25
|
@url = url.to_s.gsub('http://', 'https://')
|
|
25
26
|
@html = html
|
|
27
|
+
@verbose = false
|
|
26
28
|
end
|
|
27
29
|
|
|
28
30
|
def title
|
|
@@ -66,24 +68,37 @@ module Curation
|
|
|
66
68
|
return possibility unless possibility.to_s.empty?
|
|
67
69
|
end
|
|
68
70
|
rescue
|
|
69
|
-
|
|
71
|
+
log 'Curation::Page find_title error'
|
|
70
72
|
end
|
|
71
73
|
return ''
|
|
72
74
|
end
|
|
73
75
|
|
|
74
76
|
def find_image
|
|
77
|
+
log "Curation::Page find_image #{url}"
|
|
75
78
|
if json_ld.any?
|
|
76
79
|
json_ld.each do |ld|
|
|
77
80
|
ld = ld.first if ld.is_a?(Array)
|
|
78
81
|
if ld.has_key? 'image'
|
|
79
82
|
image_data = ld['image']
|
|
80
|
-
|
|
83
|
+
if image_data.is_a? String
|
|
84
|
+
log "Curation::Page find_image json_ld string"
|
|
85
|
+
return image_data
|
|
86
|
+
end
|
|
81
87
|
if image_data.is_a? Array
|
|
82
88
|
first = image_data.first
|
|
83
|
-
|
|
84
|
-
|
|
89
|
+
if first.is_a? String
|
|
90
|
+
log "Curation::Page find_image json_ld array"
|
|
91
|
+
return first
|
|
92
|
+
end
|
|
93
|
+
if first.is_a? Hash
|
|
94
|
+
log "Curation::Page find_image json_ld array url"
|
|
95
|
+
return first['url']
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
if image_data.is_a? Hash
|
|
99
|
+
log "Curation::Page find_image json_ld url"
|
|
100
|
+
return image_data['url']
|
|
85
101
|
end
|
|
86
|
-
return image_data['url'] if image_data.is_a? Hash
|
|
87
102
|
end
|
|
88
103
|
end
|
|
89
104
|
end
|
|
@@ -179,7 +194,7 @@ module Curation
|
|
|
179
194
|
@json_ld.flatten!
|
|
180
195
|
# require 'byebug'; byebug
|
|
181
196
|
rescue
|
|
182
|
-
|
|
197
|
+
log 'Curation::Page json_ld error'
|
|
183
198
|
end
|
|
184
199
|
end
|
|
185
200
|
@json_ld
|
|
@@ -194,7 +209,7 @@ module Curation
|
|
|
194
209
|
def file
|
|
195
210
|
@file ||= URI.open url, 'User-Agent' => "Mozilla/5.0"
|
|
196
211
|
rescue
|
|
197
|
-
|
|
212
|
+
log "Curation::Page file error with url #{url}"
|
|
198
213
|
end
|
|
199
214
|
|
|
200
215
|
def html
|
|
@@ -205,7 +220,7 @@ module Curation
|
|
|
205
220
|
end
|
|
206
221
|
@html
|
|
207
222
|
rescue
|
|
208
|
-
|
|
223
|
+
log "Curation::Page html error"
|
|
209
224
|
end
|
|
210
225
|
|
|
211
226
|
def nokogiri
|
|
@@ -220,7 +235,7 @@ module Curation
|
|
|
220
235
|
end
|
|
221
236
|
@nokogiri
|
|
222
237
|
rescue
|
|
223
|
-
|
|
238
|
+
log 'Curation::Page nokogiri error'
|
|
224
239
|
end
|
|
225
240
|
|
|
226
241
|
def metainspector
|
|
@@ -230,13 +245,13 @@ module Curation
|
|
|
230
245
|
end
|
|
231
246
|
@metainspector
|
|
232
247
|
rescue
|
|
233
|
-
|
|
248
|
+
log 'Curation::Page metainspector error'
|
|
234
249
|
end
|
|
235
250
|
|
|
236
251
|
def metatags
|
|
237
252
|
@metatags ||= metainspector.meta_tag['name']
|
|
238
253
|
rescue
|
|
239
|
-
|
|
254
|
+
log 'Curation::Page metatags error'
|
|
240
255
|
end
|
|
241
256
|
|
|
242
257
|
# réforme -> réforme
|
|
@@ -259,5 +274,9 @@ module Curation
|
|
|
259
274
|
text
|
|
260
275
|
end
|
|
261
276
|
end
|
|
277
|
+
|
|
278
|
+
def log(message)
|
|
279
|
+
puts message if verbose
|
|
280
|
+
end
|
|
262
281
|
end
|
|
263
282
|
end
|
metadata
CHANGED
|
@@ -1,29 +1,29 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: curation
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '1.
|
|
4
|
+
version: '1.11'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Arnaud Levy
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2023-08-29 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: metainspector
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
|
-
- - "
|
|
17
|
+
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '
|
|
19
|
+
version: '0'
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
|
-
- - "
|
|
24
|
+
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '
|
|
26
|
+
version: '0'
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
28
|
name: nokogiri
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -94,7 +94,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
94
94
|
- !ruby/object:Gem::Version
|
|
95
95
|
version: '0'
|
|
96
96
|
requirements: []
|
|
97
|
-
rubygems_version: 3.
|
|
97
|
+
rubygems_version: 3.4.6
|
|
98
98
|
signing_key:
|
|
99
99
|
specification_version: 4
|
|
100
100
|
summary: Curation of content
|