curation 1.10 → 1.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +33 -44
- data/curation.gemspec +1 -1
- data/lib/curation/version.rb +1 -1
- data/lib/curation.rb +30 -11
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bcbcb0a8ecb81b659e2ae2aa35e27acac6006265cf2743b6394f81203c634425
|
4
|
+
data.tar.gz: 25b25c7f30be8f9b004cec7efbd41caae4e69e87000ff585b92c8377b7855439
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 420056127e2c0ca86a4ad2e08fb37aa3da31a394264e4d7807a844dd9db61bc44124d6e978a076d9f50a1df37f7a5f80cc782145f54d11c7c3ce00566feca80d
|
7
|
+
data.tar.gz: a3b387e1c7345968c0eaaf1d2e8fbdbf752aa8b7a8b1e3b3807f206c43e7b096419b61b907622065f690d00a55b933038f0f3ed1b4fefa4d970f272afc1438d5
|
data/Gemfile.lock
CHANGED
@@ -1,88 +1,77 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
curation (1.
|
4
|
+
curation (1.11)
|
5
5
|
htmlentities
|
6
|
-
metainspector
|
6
|
+
metainspector
|
7
7
|
nokogiri
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
addressable (2.8.
|
13
|
-
public_suffix (>= 2.0.2, <
|
12
|
+
addressable (2.8.5)
|
13
|
+
public_suffix (>= 2.0.2, < 6.0)
|
14
14
|
ansi (1.5.0)
|
15
15
|
builder (3.2.4)
|
16
16
|
byebug (11.1.3)
|
17
17
|
domain_name (0.5.20190701)
|
18
18
|
unf (>= 0.0.5, < 1.0.0)
|
19
|
-
faraday (
|
20
|
-
faraday-
|
21
|
-
faraday-em_synchrony (~> 1.0)
|
22
|
-
faraday-excon (~> 1.1)
|
23
|
-
faraday-httpclient (~> 1.0)
|
24
|
-
faraday-multipart (~> 1.0)
|
25
|
-
faraday-net_http (~> 1.0)
|
26
|
-
faraday-net_http_persistent (~> 1.0)
|
27
|
-
faraday-patron (~> 1.0)
|
28
|
-
faraday-rack (~> 1.0)
|
29
|
-
faraday-retry (~> 1.0)
|
19
|
+
faraday (2.7.10)
|
20
|
+
faraday-net_http (>= 2.0, < 3.1)
|
30
21
|
ruby2_keywords (>= 0.0.4)
|
31
22
|
faraday-cookie_jar (0.0.7)
|
32
23
|
faraday (>= 0.8.0)
|
33
24
|
http-cookie (~> 1.0.0)
|
34
|
-
faraday-em_http (1.0.0)
|
35
|
-
faraday-em_synchrony (1.0.0)
|
36
25
|
faraday-encoding (0.0.5)
|
37
26
|
faraday
|
38
|
-
faraday-
|
39
|
-
|
27
|
+
faraday-follow_redirects (0.3.0)
|
28
|
+
faraday (>= 1, < 3)
|
29
|
+
faraday-gzip (1.0.0)
|
30
|
+
faraday (>= 1.0)
|
31
|
+
zlib (~> 2.1)
|
32
|
+
faraday-http-cache (2.5.0)
|
40
33
|
faraday (>= 0.8)
|
41
|
-
faraday-
|
42
|
-
faraday-
|
43
|
-
|
44
|
-
|
45
|
-
faraday-net_http_persistent (1.2.0)
|
46
|
-
faraday-patron (1.0.0)
|
47
|
-
faraday-rack (1.0.0)
|
48
|
-
faraday-retry (1.0.3)
|
49
|
-
faraday_middleware (1.2.0)
|
50
|
-
faraday (~> 1.0)
|
51
|
-
fastimage (2.2.6)
|
34
|
+
faraday-net_http (3.0.2)
|
35
|
+
faraday-retry (2.2.0)
|
36
|
+
faraday (~> 2.0)
|
37
|
+
fastimage (2.2.7)
|
52
38
|
htmlentities (4.3.4)
|
53
39
|
http-cookie (1.0.5)
|
54
40
|
domain_name (~> 0.5)
|
55
|
-
metainspector (5.
|
56
|
-
addressable (~> 2.
|
57
|
-
faraday (
|
41
|
+
metainspector (5.15.0)
|
42
|
+
addressable (~> 2.8.4)
|
43
|
+
faraday (~> 2.5)
|
58
44
|
faraday-cookie_jar (~> 0.0)
|
59
45
|
faraday-encoding (~> 0.0)
|
60
|
-
faraday-
|
61
|
-
|
46
|
+
faraday-follow_redirects (~> 0.3)
|
47
|
+
faraday-gzip (>= 0.1, < 2.0)
|
48
|
+
faraday-http-cache (~> 2.5)
|
49
|
+
faraday-retry (~> 2.0)
|
62
50
|
fastimage (~> 2.2)
|
63
51
|
nesty (~> 1.0)
|
64
|
-
nokogiri (~> 1.
|
65
|
-
minitest (5.
|
66
|
-
minitest-reporters (1.
|
52
|
+
nokogiri (~> 1.13)
|
53
|
+
minitest (5.19.0)
|
54
|
+
minitest-reporters (1.6.1)
|
67
55
|
ansi
|
68
56
|
builder
|
69
57
|
minitest (>= 5.0)
|
70
58
|
ruby-progressbar
|
71
|
-
multipart-post (2.2.0)
|
72
59
|
nesty (1.0.2)
|
73
|
-
nokogiri (1.
|
60
|
+
nokogiri (1.15.4-x86_64-darwin)
|
74
61
|
racc (~> 1.4)
|
75
|
-
public_suffix (
|
76
|
-
racc (1.
|
62
|
+
public_suffix (5.0.3)
|
63
|
+
racc (1.7.1)
|
77
64
|
rake (12.3.3)
|
78
|
-
ruby-progressbar (1.
|
65
|
+
ruby-progressbar (1.13.0)
|
79
66
|
ruby2_keywords (0.0.5)
|
80
67
|
unf (0.1.4)
|
81
68
|
unf_ext
|
82
69
|
unf_ext (0.0.8.2)
|
70
|
+
zlib (2.1.1)
|
83
71
|
|
84
72
|
PLATFORMS
|
85
73
|
x86_64-darwin-21
|
74
|
+
x86_64-darwin-22
|
86
75
|
|
87
76
|
DEPENDENCIES
|
88
77
|
byebug
|
@@ -92,4 +81,4 @@ DEPENDENCIES
|
|
92
81
|
rake (~> 12.0)
|
93
82
|
|
94
83
|
BUNDLED WITH
|
95
|
-
2.
|
84
|
+
2.4.6
|
data/curation.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.bindir = "exe"
|
22
22
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
23
|
spec.require_paths = ["lib"]
|
24
|
-
spec.add_dependency "metainspector"
|
24
|
+
spec.add_dependency "metainspector"
|
25
25
|
spec.add_dependency "nokogiri"
|
26
26
|
spec.add_dependency "htmlentities"
|
27
27
|
end
|
data/lib/curation/version.rb
CHANGED
data/lib/curation.rb
CHANGED
@@ -8,6 +8,7 @@ module Curation
|
|
8
8
|
|
9
9
|
class Page
|
10
10
|
attr_reader :url
|
11
|
+
attr_accessor :verbose
|
11
12
|
|
12
13
|
BLACKLIST = [
|
13
14
|
'head', 'script', 'style', 'iframe', 'nav', 'noscript', 'header', 'footer', 'aside',
|
@@ -23,6 +24,7 @@ module Curation
|
|
23
24
|
def initialize(url, html = nil)
|
24
25
|
@url = url.to_s.gsub('http://', 'https://')
|
25
26
|
@html = html
|
27
|
+
@verbose = false
|
26
28
|
end
|
27
29
|
|
28
30
|
def title
|
@@ -66,24 +68,37 @@ module Curation
|
|
66
68
|
return possibility unless possibility.to_s.empty?
|
67
69
|
end
|
68
70
|
rescue
|
69
|
-
|
71
|
+
log 'Curation::Page find_title error'
|
70
72
|
end
|
71
73
|
return ''
|
72
74
|
end
|
73
75
|
|
74
76
|
def find_image
|
77
|
+
log "Curation::Page find_image #{url}"
|
75
78
|
if json_ld.any?
|
76
79
|
json_ld.each do |ld|
|
77
80
|
ld = ld.first if ld.is_a?(Array)
|
78
81
|
if ld.has_key? 'image'
|
79
82
|
image_data = ld['image']
|
80
|
-
|
83
|
+
if image_data.is_a? String
|
84
|
+
log "Curation::Page find_image json_ld string"
|
85
|
+
return image_data
|
86
|
+
end
|
81
87
|
if image_data.is_a? Array
|
82
88
|
first = image_data.first
|
83
|
-
|
84
|
-
|
89
|
+
if first.is_a? String
|
90
|
+
log "Curation::Page find_image json_ld array"
|
91
|
+
return first
|
92
|
+
end
|
93
|
+
if first.is_a? Hash
|
94
|
+
log "Curation::Page find_image json_ld array url"
|
95
|
+
return first['url']
|
96
|
+
end
|
97
|
+
end
|
98
|
+
if image_data.is_a? Hash
|
99
|
+
log "Curation::Page find_image json_ld url"
|
100
|
+
return image_data['url']
|
85
101
|
end
|
86
|
-
return image_data['url'] if image_data.is_a? Hash
|
87
102
|
end
|
88
103
|
end
|
89
104
|
end
|
@@ -179,7 +194,7 @@ module Curation
|
|
179
194
|
@json_ld.flatten!
|
180
195
|
# require 'byebug'; byebug
|
181
196
|
rescue
|
182
|
-
|
197
|
+
log 'Curation::Page json_ld error'
|
183
198
|
end
|
184
199
|
end
|
185
200
|
@json_ld
|
@@ -194,7 +209,7 @@ module Curation
|
|
194
209
|
def file
|
195
210
|
@file ||= URI.open url, 'User-Agent' => "Mozilla/5.0"
|
196
211
|
rescue
|
197
|
-
|
212
|
+
log "Curation::Page file error with url #{url}"
|
198
213
|
end
|
199
214
|
|
200
215
|
def html
|
@@ -205,7 +220,7 @@ module Curation
|
|
205
220
|
end
|
206
221
|
@html
|
207
222
|
rescue
|
208
|
-
|
223
|
+
log "Curation::Page html error"
|
209
224
|
end
|
210
225
|
|
211
226
|
def nokogiri
|
@@ -220,7 +235,7 @@ module Curation
|
|
220
235
|
end
|
221
236
|
@nokogiri
|
222
237
|
rescue
|
223
|
-
|
238
|
+
log 'Curation::Page nokogiri error'
|
224
239
|
end
|
225
240
|
|
226
241
|
def metainspector
|
@@ -230,13 +245,13 @@ module Curation
|
|
230
245
|
end
|
231
246
|
@metainspector
|
232
247
|
rescue
|
233
|
-
|
248
|
+
log 'Curation::Page metainspector error'
|
234
249
|
end
|
235
250
|
|
236
251
|
def metatags
|
237
252
|
@metatags ||= metainspector.meta_tag['name']
|
238
253
|
rescue
|
239
|
-
|
254
|
+
log 'Curation::Page metatags error'
|
240
255
|
end
|
241
256
|
|
242
257
|
# réforme -> réforme
|
@@ -259,5 +274,9 @@ module Curation
|
|
259
274
|
text
|
260
275
|
end
|
261
276
|
end
|
277
|
+
|
278
|
+
def log(message)
|
279
|
+
puts message if verbose
|
280
|
+
end
|
262
281
|
end
|
263
282
|
end
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: curation
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
4
|
+
version: '1.11'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arnaud Levy
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-08-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: metainspector
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,7 +94,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
94
94
|
- !ruby/object:Gem::Version
|
95
95
|
version: '0'
|
96
96
|
requirements: []
|
97
|
-
rubygems_version: 3.
|
97
|
+
rubygems_version: 3.4.6
|
98
98
|
signing_key:
|
99
99
|
specification_version: 4
|
100
100
|
summary: Curation of content
|